[llvm] [AMDGPU] Legalize 64bit elements for BUILD_VECTOR on gfx942 (PR #145052)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 08:31:45 PDT 2025
https://github.com/JanekvO created https://github.com/llvm/llvm-project/pull/145052
Any subtarget with full mov64 support may be able to take advantage of build_vector with 64b vector elts, Change to build_vector select for 64b element support and adds build_vector combine that will try to combine build_vector with 32b elements into its 64b equivalent.
>From 8fd418169985a4264faa0fece62552c596b7d7d3 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <Janek.vanOirschot at amd.com>
Date: Fri, 20 Jun 2025 03:24:29 -0700
Subject: [PATCH] [AMDGPU] Legalize 64bit elements for BUILD_VECTOR on gfx942
---
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 22 +-
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 8 +
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 114 +-
llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 +
llvm/test/CodeGen/AMDGPU/bf16-conversions.ll | 28 +-
.../AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll | 239 +-
.../AMDGPU/buffer-atomic-fadd.f32-rtn.ll | 251 +-
.../CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll | 832 +-
.../AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll | 239 +-
.../AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll | 251 +-
llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 48 +-
llvm/test/CodeGen/AMDGPU/fmaximum3.ll | 2 +-
llvm/test/CodeGen/AMDGPU/fminimum3.ll | 2 +-
llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll | 16 +-
.../CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll | 159 +-
llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll | 590 +-
llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll | 590 +-
.../CodeGen/AMDGPU/masked-load-vectortypes.ll | 29 +-
llvm/test/CodeGen/AMDGPU/maximumnum.ll | 1461 ++-
llvm/test/CodeGen/AMDGPU/minimumnum.ll | 1461 ++-
llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 429 +-
llvm/test/CodeGen/AMDGPU/preload-kernargs.ll | 8 +-
.../AMDGPU/shufflevector.v2i64.v2i64.ll | 234 +-
.../AMDGPU/shufflevector.v2i64.v3i64.ll | 575 +-
.../AMDGPU/shufflevector.v2i64.v4i64.ll | 1273 ++-
.../AMDGPU/shufflevector.v2i64.v8i64.ll | 4339 +++++----
.../CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll | 234 +-
.../CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll | 575 +-
.../CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll | 1273 ++-
.../AMDGPU/shufflevector.v3i64.v2i64.ll | 685 +-
.../AMDGPU/shufflevector.v3i64.v3i64.ll | 1842 ++--
.../AMDGPU/shufflevector.v3i64.v4i64.ll | 5059 +++++-----
.../CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll | 685 +-
.../CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll | 1842 ++--
.../CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll | 5059 +++++-----
.../AMDGPU/shufflevector.v4i64.v2i64.ll | 2801 ++++--
.../AMDGPU/shufflevector.v4i64.v3i64.ll | 2389 +++--
.../AMDGPU/shufflevector.v4i64.v4i64.ll | 8424 +++++++++--------
.../CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll | 2801 ++++--
.../CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll | 2389 +++--
.../CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll | 8424 +++++++++--------
llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll | 25 +-
.../test/CodeGen/AMDGPU/vector-reduce-fmax.ll | 7 +-
.../test/CodeGen/AMDGPU/vector-reduce-fmin.ll | 5 +-
.../test/CodeGen/AMDGPU/vni8-across-blocks.ll | 5 +-
45 files changed, 33984 insertions(+), 23741 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 6e990cb2e160c..044c073e7f918 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -440,6 +440,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
EVT EltVT = VT.getVectorElementType();
SDLoc DL(N);
SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
+ unsigned NumRegs = EltVT.getSizeInBits() / 32;
+ bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
if (NumVectorElts == 1) {
CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
@@ -449,12 +451,13 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet");
+ assert((IsGCN || (!IsGCN && NumRegs == 1)) &&
+ "R600 does not support 64-bit reg_seq elements");
// 32 = Max Num Vector Elements
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
// 1 = Vector Register Class
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
- bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
unsigned NOps = N->getNumOperands();
@@ -464,8 +467,9 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
IsRegSeq = false;
break;
}
- unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
- : R600RegisterInfo::getSubRegFromChannel(i);
+ unsigned Sub =
+ IsGCN ? SIRegisterInfo::getSubRegFromChannel(i * NumRegs, NumRegs)
+ : R600RegisterInfo::getSubRegFromChannel(i);
RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
}
@@ -475,8 +479,9 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, EltVT);
for (unsigned i = NOps; i < NumVectorElts; ++i) {
- unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i)
- : R600RegisterInfo::getSubRegFromChannel(i);
+ unsigned Sub =
+ IsGCN ? SIRegisterInfo::getSubRegFromChannel(i * NumRegs, NumRegs)
+ : R600RegisterInfo::getSubRegFromChannel(i);
RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(Sub, DL, MVT::i32);
@@ -644,9 +649,12 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
break;
}
- assert(VT.getVectorElementType().bitsEq(MVT::i32));
+ EVT VET = VT.getVectorElementType();
+ assert(VET.bitsEq(MVT::i32) || VET.bitsEq(MVT::i64));
+ unsigned EltSize = VET.getSizeInBits();
unsigned RegClassID =
- SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID();
+ SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * EltSize)
+ ->getID();
SelectBuildVector(N, RegClassID);
return;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 134adc681215f..8a12b9b25d713 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5206,6 +5206,14 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::BITCAST: {
EVT DestVT = N->getValueType(0);
+ // Avoid undoing build_vector with 64b elements if subtarget supports 64b
+ // movs (i.e., avoid inf loop through combines).
+ if (Subtarget->isGCN()) {
+ const GCNSubtarget &ST = DAG.getSubtarget<GCNSubtarget>();
+ if (ST.hasMovB64())
+ break;
+ }
+
// Push casts through vector builds. This helps avoid emitting a large
// number of copies when materializing floating point vector constants.
//
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 07d79d677104a..2dc12357cb4b0 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -357,9 +357,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// Most operations are naturally 32-bit vector operations. We only support
// load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
for (MVT Vec64 : {MVT::v2i64, MVT::v2f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
-
+ if (STI.hasMovB64())
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal);
+ else {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32);
@@ -371,9 +374,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
for (MVT Vec64 : {MVT::v3i64, MVT::v3f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32);
-
+ if (STI.hasMovB64())
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal);
+ else {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v6i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v6i32);
@@ -385,9 +391,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
for (MVT Vec64 : {MVT::v4i64, MVT::v4f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32);
-
+ if (STI.hasMovB64())
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal);
+ else {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v8i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v8i32);
@@ -399,9 +408,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
for (MVT Vec64 : {MVT::v8i64, MVT::v8f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32);
-
+ if (STI.hasMovB64())
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal);
+ else {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v16i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v16i32);
@@ -413,9 +425,12 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
for (MVT Vec64 : {MVT::v16i64, MVT::v16f64}) {
- setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
- AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32);
-
+ if (STI.hasMovB64())
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Legal);
+ else {
+ setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+ AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v32i32);
+ }
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v32i32);
@@ -945,6 +960,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
}
setTargetDAGCombine({ISD::ADD,
+ ISD::BUILD_VECTOR,
ISD::UADDO_CARRY,
ISD::SUB,
ISD::USUBO_CARRY,
@@ -15486,6 +15502,72 @@ SDValue SITargetLowering::performClampCombine(SDNode *N,
return SDValue(CSrc, 0);
}
+SDValue
+SITargetLowering::performBuildVectorCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ const GCNSubtarget *ST = getSubtarget();
+ if (DCI.Level < AfterLegalizeDAG || !ST->hasMovB64())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
+
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned SizeBits = VT.getSizeInBits();
+ unsigned EltSize = EltVT.getSizeInBits();
+
+ // Skip if:
+ // - Value type isn't multiplication of 64 bit (e.g., v3i32), or
+ // - BuildVector instruction has non-constants, or
+ // - Element type has already been combined into i64 elements
+ if ((SizeBits % 64) != 0 || !BVN->isConstant() || EltVT == MVT::i64)
+ return SDValue();
+
+ // Construct the 64b values.
+ SmallVector<uint64_t, 8> ImmVals;
+ uint64_t ImmVal = 0;
+ uint64_t ImmSize = 0;
+ for (SDValue Opand : N->ops()) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Opand);
+ if (!C)
+ return SDValue();
+
+ ImmVal |= C->getZExtValue() << ImmSize;
+ ImmSize += EltSize;
+ if (ImmSize > 64)
+ return SDValue();
+ if (ImmSize == 64) {
+ if (!isUInt<32>(ImmVal))
+ return SDValue();
+ ImmVals.push_back(ImmVal);
+ ImmVal = 0;
+ ImmSize = 0;
+ }
+ }
+
+ // Avoid emitting build_vector with 1 element and directly emit value.
+ if (ImmVals.size() == 1) {
+ SDValue Val = DAG.getConstant(ImmVals[0], SL, MVT::i64);
+ return DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Val);
+ }
+
+ // Construct and return build_vector with 64b elements.
+ if (!ImmVals.empty()) {
+ SmallVector<SDValue, 8> VectorConsts;
+ for (uint64_t I : ImmVals)
+ VectorConsts.push_back(DAG.getConstant(I, SL, MVT::i64));
+ unsigned NewNumElts = SizeBits / 64;
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewVT = EVT::getVectorVT(Ctx, MVT::i64, NewNumElts);
+ SDValue BV = DAG.getBuildVector(
+ NewVT, SL, ArrayRef(VectorConsts.begin(), VectorConsts.end()));
+ return DAG.getBitcast(VT, BV);
+ }
+ return SDValue();
+}
+
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
@@ -15573,6 +15655,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performFCanonicalizeCombine(N, DCI);
case AMDGPUISD::RCP:
return performRcpCombine(N, DCI);
+ case ISD::BUILD_VECTOR:
+ return performBuildVectorCombine(N, DCI);
case ISD::FLDEXP:
case AMDGPUISD::FRACT:
case AMDGPUISD::RSQ:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 89fb12b52c3e6..8be0631b37dd2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -231,6 +231,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performClampCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performBuildVectorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
diff --git a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
index a597faa028f22..8c48f9a4a7b5b 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16-conversions.ll
@@ -152,24 +152,24 @@ define amdgpu_ps float @v_test_cvt_v2f64_v2bf16_v(<2 x double> %src) {
; GFX-950: ; %bb.0:
; GFX-950-NEXT: v_cvt_f32_f64_e32 v6, v[2:3]
; GFX-950-NEXT: v_cvt_f64_f32_e32 v[4:5], v6
-; GFX-950-NEXT: v_and_b32_e32 v7, 1, v6
-; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[2:3]|, |v[4:5]|
+; GFX-950-NEXT: v_cmp_gt_f64_e64 s[0:1], |v[2:3]|, |v[4:5]|
; GFX-950-NEXT: v_cmp_nlg_f64_e32 vcc, v[2:3], v[4:5]
-; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v7
-; GFX-950-NEXT: v_cndmask_b32_e64 v2, -1, 1, s[2:3]
-; GFX-950-NEXT: v_add_u32_e32 v2, v6, v2
+; GFX-950-NEXT: v_and_b32_e32 v2, 1, v6
+; GFX-950-NEXT: v_cndmask_b32_e64 v7, -1, 1, s[0:1]
+; GFX-950-NEXT: v_cvt_f32_f64_e32 v8, v[0:1]
+; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v2
+; GFX-950-NEXT: v_add_u32_e32 v7, v6, v7
; GFX-950-NEXT: s_or_b64 vcc, vcc, s[0:1]
-; GFX-950-NEXT: v_cvt_f32_f64_e32 v5, v[0:1]
-; GFX-950-NEXT: v_cndmask_b32_e32 v4, v2, v6, vcc
-; GFX-950-NEXT: v_cvt_f64_f32_e32 v[2:3], v5
-; GFX-950-NEXT: v_and_b32_e32 v6, 1, v5
-; GFX-950-NEXT: v_cmp_gt_f64_e64 s[2:3], |v[0:1]|, |v[2:3]|
+; GFX-950-NEXT: v_cvt_f64_f32_e32 v[2:3], v8
+; GFX-950-NEXT: v_cndmask_b32_e32 v4, v7, v6, vcc
+; GFX-950-NEXT: v_cmp_gt_f64_e64 s[0:1], |v[0:1]|, |v[2:3]|
; GFX-950-NEXT: v_cmp_nlg_f64_e32 vcc, v[0:1], v[2:3]
-; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v6
-; GFX-950-NEXT: v_cndmask_b32_e64 v0, -1, 1, s[2:3]
-; GFX-950-NEXT: v_add_u32_e32 v0, v5, v0
+; GFX-950-NEXT: v_and_b32_e32 v0, 1, v8
+; GFX-950-NEXT: v_cndmask_b32_e64 v5, -1, 1, s[0:1]
+; GFX-950-NEXT: v_cmp_eq_u32_e64 s[0:1], 1, v0
+; GFX-950-NEXT: v_add_u32_e32 v5, v8, v5
; GFX-950-NEXT: s_or_b64 vcc, vcc, s[0:1]
-; GFX-950-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
+; GFX-950-NEXT: v_cndmask_b32_e32 v0, v5, v8, vcc
; GFX-950-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4
; GFX-950-NEXT: ; return to shader part epilog
%res = fptrunc <2 x double> %src to <2 x bfloat>
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
index a14114358433a..9b783ed9d7772 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908_GFX11 %s
define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
@@ -167,25 +167,41 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret void
}
@@ -212,26 +228,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr a
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
@@ -258,26 +291,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr a
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
}
@@ -306,28 +356,47 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr
; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908_GFX11-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
index eb452dc4b874f..c22360b861a21 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX11 %s
define amdgpu_ps float @buffer_atomic_fadd_f32_offset_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
@@ -153,26 +153,43 @@ define amdgpu_ps float @buffer_atomic_fadd_f32_bothen_rtn(float %val, <4 x i32>
}
define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFSET_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
;
; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_rtn
; GFX11: bb.0 (%ir-block.0):
@@ -199,27 +216,45 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offset_rtn(float %val, ptr ad
}
define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_OFFEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
;
; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_rtn
; GFX11: bb.0 (%ir-block.0):
@@ -247,27 +282,45 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_offen_rtn(float %val, ptr add
}
define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_IDXEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
;
; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_rtn
; GFX11: bb.0 (%ir-block.0):
@@ -295,29 +348,49 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_idxen_rtn(float %val, ptr add
}
define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_ADD_F32_BOTHEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
;
; GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_rtn
; GFX11: bb.0 (%ir-block.0):
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
index 37928a78622a6..fe0e9fd1d7fe5 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFx90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
define amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn
@@ -207,251 +207,655 @@ define amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32
}
define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY7]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET killed [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: S_ENDPGM 0
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: S_ENDPGM 0
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN killed [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: S_ENDPGM 0
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY9]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFx90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN killed [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: S_ENDPGM 0
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
}
define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY7]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY8]], implicit $exec
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
+ ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[COPY11]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY12]], implicit $exec
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret double %ret
}
define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
- ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
+ ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
+ ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret double %ret
}
define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
- ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY8]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY9]], implicit $exec
+ ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
+ ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[COPY12]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY13]], implicit $exec
+ ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret double %ret
}
define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
- ; GFX90A_GFX942-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
- ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
- ; GFX90A_GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
- ; GFX90A_GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFX90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFX90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+ ; GFX90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
+ ; GFX90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[COPY9:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY9]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY10]], implicit $exec
+ ; GFX942-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+ ; GFX942-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY11]], implicit $exec
+ ; GFX942-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFX942-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
+ ;
+ ; GFx90A-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn
+ ; GFx90A: bb.0 (%ir-block.0):
+ ; GFx90A-NEXT: liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr2, $vgpr3, $sgpr4
+ ; GFx90A-NEXT: {{ $}}
+ ; GFx90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFx90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; GFx90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFx90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFx90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFx90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFx90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFx90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFx90A-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFx90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFx90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFx90A-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFx90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY12]], %subreg.sub0, killed [[COPY11]], %subreg.sub1, killed [[COPY10]], %subreg.sub2, killed [[COPY9]], %subreg.sub3
+ ; GFx90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY7]], %subreg.sub1
+ ; GFx90A-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFx90A-NEXT: [[COPY13:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE3]]
+ ; GFx90A-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[COPY13]], killed [[REG_SEQUENCE4]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8)
+ ; GFx90A-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY14]], implicit $exec
+ ; GFx90A-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1
+ ; GFx90A-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[COPY15]], implicit $exec
+ ; GFx90A-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]]
+ ; GFx90A-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
+ ; GFx90A-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1
%ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret double %ret
}
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
index 790cd8ef9eccf..81e8230e79259 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX908 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
define amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
@@ -165,25 +165,41 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %va
; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
}
@@ -210,26 +226,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val
; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
@@ -256,26 +289,43 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val
; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret void
}
@@ -304,28 +354,47 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va
; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
; GFX908-NEXT: S_ENDPGM 0
;
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: S_ENDPGM 0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: S_ENDPGM 0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: S_ENDPGM 0
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
index 89e1a4be4e16c..803c2fec3d4d5 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX90A_GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX90A %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GFX90A_GFX942,GFX942 %s
define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
; GFX90A_GFX942-LABEL: name: buffer_atomic_fadd_v2f16_offset_rtn
@@ -86,106 +86,179 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val
}
define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0)
ret <2 x half> %ret
}
define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret <2 x half> %ret
}
define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
ret <2 x half> %ret
}
define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
- ; GFX90A_GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
- ; GFX90A_GFX942: bb.0 (%ir-block.0):
- ; GFX90A_GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
- ; GFX90A_GFX942-NEXT: {{ $}}
- ; GFX90A_GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
- ; GFX90A_GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
- ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
- ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
- ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
- ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
- ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
- ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
- ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
- ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
- ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ; GFX90A-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
+ ; GFX90A: bb.0 (%ir-block.0):
+ ; GFX90A-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX90A-NEXT: {{ $}}
+ ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX90A-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX90A-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX90A-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX90A-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX90A-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX90A-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX90A-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+ ; GFX90A-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX90A-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
+ ; GFX90A-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
+ ; GFX90A-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
+ ; GFX90A-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX90A-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX90A-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
+ ; GFX90A-NEXT: SI_RETURN_TO_EPILOG $vgpr0
+ ;
+ ; GFX942-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_rtn
+ ; GFX942: bb.0 (%ir-block.0):
+ ; GFX942-NEXT: liveins: $vgpr0, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr1, $vgpr2, $sgpr4
+ ; GFX942-NEXT: {{ $}}
+ ; GFX942-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4
+ ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; GFX942-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr3
+ ; GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr2
+ ; GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr1
+ ; GFX942-NEXT: [[COPY6:%[0-9]+]]:sgpr_32 = COPY $sgpr0
+ ; GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1
+ ; GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[REG_SEQUENCE1]], %subreg.sub0_sub1, killed [[REG_SEQUENCE]], %subreg.sub2_sub3
+ ; GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
+ ; GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
+ ; GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
%ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
ret <2 x half> %ret
}
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index b5e579b78a59c..2ae48f441fded 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -110,10 +110,8 @@ define amdgpu_kernel void @zero_init_kernel() {
;
; GFX942-LABEL: zero_init_kernel:
; GFX942: ; %bb.0:
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:48
@@ -304,10 +302,8 @@ define void @zero_init_foo() {
; GFX942-LABEL: zero_init_foo:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48
@@ -1180,10 +1176,8 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() {
; GFX942: ; %bb.0:
; GFX942-NEXT: scratch_load_dword v0, off, off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:256
@@ -1397,10 +1391,8 @@ define void @zero_init_small_offset_foo() {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: scratch_load_dword v0, off, s32 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256
@@ -2420,10 +2412,8 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() {
; GFX942: ; %bb.0:
; GFX942-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: s_movk_i32 s0, 0x4004
@@ -2656,10 +2646,8 @@ define void @zero_init_large_offset_foo() {
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: scratch_load_dword v0, off, s32 offset:4 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
-; GFX942-NEXT: s_mov_b32 s0, 0
-; GFX942-NEXT: s_mov_b32 s1, s0
-; GFX942-NEXT: s_mov_b32 s2, s0
-; GFX942-NEXT: s_mov_b32 s3, s0
+; GFX942-NEXT: s_mov_b64 s[0:1], 0
+; GFX942-NEXT: s_mov_b64 s[2:3], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GFX942-NEXT: s_add_i32 s0, s32, 0x4004
@@ -4139,8 +4127,7 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) {
; GFX942-LABEL: store_load_i64_aligned:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v2, 15
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], 15
; GFX942-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
@@ -4250,8 +4237,7 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX942-LABEL: store_load_i64_unaligned:
; GFX942: ; %bb.0: ; %bb
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v2, 15
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], 15
; GFX942-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
@@ -5010,10 +4996,8 @@ define amdgpu_ps void @large_offset() {
;
; GFX942-LABEL: large_offset:
; GFX942: ; %bb.0: ; %bb
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:3024 sc0 sc1
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:3024 sc0 sc1
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 53d940e1e6c1a..c00ec6f3a506e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -4010,8 +4010,8 @@ define <2 x double> @v_no_fmaximum3_f64__multi_use(double %a, double %b, double
; GFX9-NEXT: v_max_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.maximum.f64(double %a, double %b)
%max1 = call double @llvm.maximum.f64(double %max0, double %c)
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index d1d0c0dcdb7e0..fd7563dbfc098 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -4010,8 +4010,8 @@ define <2 x double> @v_no_fminimum3_f64__multi_use(double %a, double %b, double
; GFX9-NEXT: v_min_f64 v[2:3], v[0:1], v[4:5]
; GFX9-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX9-NEXT: s_nop 1
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v8, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX9-NEXT: s_setpc_b64 s[30:31]
%max0 = call double @llvm.minimum.f64(double %a, double %b)
%max1 = call double @llvm.minimum.f64(double %max0, double %c)
diff --git a/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll b/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
index 0dfeb3454dad5..cfbad1fa6742e 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-to-bf16.ll
@@ -726,8 +726,8 @@ define amdgpu_ps <3 x i32> @s_uitofp_v3i1_to_v3bf16(<3 x i1> inreg %num) {
; GFX9-LABEL: s_uitofp_v3i1_to_v3bf16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s4, 1, s2
-; GFX9-NEXT: s_and_b32 s2, 1, s1
-; GFX9-NEXT: s_bitcmp1_b32 s0, 0
+; GFX9-NEXT: s_and_b32 s2, 1, s0
+; GFX9-NEXT: s_bitcmp1_b32 s1, 0
; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9-NEXT: s_cmp_eq_u32 s2, 1
; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0
@@ -760,8 +760,8 @@ define amdgpu_ps <3 x i32> @s_uitofp_v3i1_to_v3bf16(<3 x i1> inreg %num) {
; GFX9-NEXT: v_readfirstlane_b32 s2, v0
; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; GFX9-NEXT: v_readfirstlane_b32 s1, v1
-; GFX9-NEXT: v_readfirstlane_b32 s0, v2
+; GFX9-NEXT: v_readfirstlane_b32 s0, v1
+; GFX9-NEXT: v_readfirstlane_b32 s1, v2
; GFX9-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_uitofp_v3i1_to_v3bf16:
@@ -1178,8 +1178,8 @@ define amdgpu_ps <4 x i32> @s_uitofp_v4i1_to_v4bf16(<4 x i1> inreg %num) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_and_b32 s6, 1, s3
; GFX9-NEXT: s_and_b32 s4, 1, s2
-; GFX9-NEXT: s_and_b32 s2, 1, s1
-; GFX9-NEXT: s_bitcmp1_b32 s0, 0
+; GFX9-NEXT: s_and_b32 s2, 1, s0
+; GFX9-NEXT: s_bitcmp1_b32 s1, 0
; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9-NEXT: s_cmp_eq_u32 s2, 1
; GFX9-NEXT: s_cselect_b64 s[2:3], -1, 0
@@ -1223,8 +1223,8 @@ define amdgpu_ps <4 x i32> @s_uitofp_v4i1_to_v4bf16(<4 x i1> inreg %num) {
; GFX9-NEXT: v_readfirstlane_b32 s2, v1
; GFX9-NEXT: v_cndmask_b32_e32 v3, v5, v4, vcc
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3
-; GFX9-NEXT: v_readfirstlane_b32 s1, v2
-; GFX9-NEXT: v_readfirstlane_b32 s0, v3
+; GFX9-NEXT: v_readfirstlane_b32 s0, v2
+; GFX9-NEXT: v_readfirstlane_b32 s1, v3
; GFX9-NEXT: ; return to shader part epilog
;
; GFX11-LABEL: s_uitofp_v4i1_to_v4bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
index 5d5dc01439fe4..92c2255588456 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
@@ -164,20 +164,20 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_1:
-; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0x3ff00000
-; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], [[HIGH_BITS]]
-; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
+; GFX90A-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_1:
+; GFX90A: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0x3ff00000
+; GFX90A: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], [[HIGH_BITS]]
+; GFX90A: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
-; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
+; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 1.0{{$}}
; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
; GCN: global_store_dwordx4
; GCN: global_store_dwordx4
@@ -189,20 +189,20 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_neg1:
-; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0xbff00000
-; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], [[HIGH_BITS]]
-; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
+; GFX90A-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_neg1:
+; GFX90A: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0xbff00000
+; GFX90A: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], [[HIGH_BITS]]
+; GFX90A: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
-; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
+; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], -1.0{{$}}
; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
; GCN: global_store_dwordx4
; GCN: global_store_dwordx4
@@ -214,19 +214,19 @@ bb:
ret void
}
-; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64:
-; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 64{{$}}
-; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], 0
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
+; GFX90A-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64:
+; GFX90A: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 64{{$}}
+; GFX90A: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], 0
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
-; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
+; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 64{{$}}
; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
; GCN: global_store_dwordx4
; GCN: global_store_dwordx4
@@ -239,14 +239,26 @@ bb:
}
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bits:
-; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
-; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], 64
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
+; GFX90A: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], 64
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
+
+; GFX942: s_mov_b32 s[[S_LOW_BITS_0:[0-9]+]], 0{{$}}
+; GFX942: s_mov_b32 s[[S_HIGH_BITS_0:[0-9]+]], 64{{$}}
+; GFX942: v_mov_b64_e32 v{{\[}}[[V_LOW_BITS_0:[0-9]+]]:[[V_HIGH_BITS_0:[0-9]+]]{{\]}}, s{{\[}}[[S_LOW_BITS_0]]:[[S_HIGH_BITS_0]]{{\]$}}
+; GFX942: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], v[[V_LOW_BITS_0]]
+; GFX942: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], v[[V_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
@@ -263,14 +275,23 @@ bb:
}
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and_low:
-; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 64{{$}}
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 64{{$}}
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_LOW_BITS_0]]
+
+; GFX942: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 64{{$}}
+; GFX942: v_accvgpr_mov_b32 a[[A_HIGH_BITS_0:[0-9]+]], a[[A_LOW_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
@@ -287,14 +308,23 @@ bb:
}
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_low:
-; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 1.0
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
-; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 1.0
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX90A: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_LOW_BITS_0]]
+
+; GFX942: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 1.0{{$}}
+; GFX942: v_accvgpr_mov_b32 a[[A_HIGH_BITS_0:[0-9]+]], a[[A_LOW_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
+; GFX942: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
@@ -323,8 +353,21 @@ bb:
}
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_lit:
-; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x405ec000
+; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
+; GFX90A-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x405ec000
+
+; GFX942: s_mov_b32 s[[S_LOW_BITS_0:[0-9]+]], 0{{$}}
+; GFX942: s_mov_b32 s[[S_HIGH_BITS_0:[0-9]+]], 0x405ec000{{$}}
+; GFX942: v_mov_b64_e32 v{{\[}}[[V_LOW_BITS_0:[0-9]+]]:[[V_HIGH_BITS_0:[0-9]+]]{{\]}}, s{{\[}}[[S_LOW_BITS_0]]:[[S_HIGH_BITS_0]]{{\]$}}
+; GFX942: v_accvgpr_write_b32 a{{[0-9]+}}, v[[V_LOW_BITS_0]]
+; GFX942: v_accvgpr_write_b32 a{{[0-9]+}}, v[[V_LOW_BITS_0]]
+; GFX942: v_accvgpr_write_b32 a{{[0-9]+}}, v[[V_HIGH_BITS_0]]
+; GFX942: v_accvgpr_write_b32 a{{[0-9]+}}, v[[V_HIGH_BITS_0]]
+; GFX942: v_accvgpr_write_b32 a{{[0-9]+}}, v[[V_HIGH_BITS_0]]
+; GFX942: v_accvgpr_write_b32 a{{[0-9]+}}, v[[V_HIGH_BITS_0]]
+; GFX942: v_accvgpr_write_b32 a{{[0-9]+}}, v[[V_LOW_BITS_0]]
+; GFX942: v_accvgpr_write_b32 a{{[0-9]+}}, v[[V_LOW_BITS_0]]
+
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
; GCN: global_store_dwordx4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
index f971080e02c5b..090707eda3ca5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maximum.f64.ll
@@ -561,17 +561,17 @@ define <2 x double> @v_maximum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX950-LABEL: v_maximum_v2f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[6:7]
+; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[0:1], v[4:5]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64:
@@ -630,12 +630,19 @@ define <2 x double> @v_maximum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nnan:
; GFX10: ; %bb.0:
@@ -711,17 +718,17 @@ define <2 x double> @v_maximum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX950-LABEL: v_maximum_v2f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: v_max_f64 v[4:5], v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[6:7]
+; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[6:7], v[0:1], v[4:5]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nsz:
@@ -780,12 +787,19 @@ define <2 x double> @v_maximum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v2f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v2f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v2f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1008,22 +1022,22 @@ define <3 x double> @v_maximum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX950-LABEL: v_maximum_v3f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[12:13], v[4:5], v[10:11]
+; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: v_max_f64 v[10:11], v[2:3], v[8:9]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64:
@@ -1092,13 +1106,21 @@ define <3 x double> @v_maximum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nnan:
; GFX10: ; %bb.0:
@@ -1189,22 +1211,22 @@ define <3 x double> @v_maximum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX950-LABEL: v_maximum_v3f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT: v_max_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[12:13], v[4:5], v[10:11]
+; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: v_max_f64 v[10:11], v[2:3], v[8:9]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[8:9], v[0:1], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT: v_max_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nsz:
@@ -1273,13 +1295,21 @@ define <3 x double> @v_maximum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v3f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v3f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v3f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1382,27 +1412,27 @@ define <4 x double> @v_maximum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX950-LABEL: v_maximum_v4f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[16:17], v[6:7], v[14:15]
+; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: v_max_f64 v[14:15], v[4:5], v[12:13]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: v_max_f64 v[12:13], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[10:11], v[0:1], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64:
@@ -1482,14 +1512,23 @@ define <4 x double> @v_maximum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nnan:
; GFX10: ; %bb.0:
@@ -1595,27 +1634,27 @@ define <4 x double> @v_maximum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX950-LABEL: v_maximum_v4f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_max_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT: v_max_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[16:17], v[6:7], v[14:15]
+; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: v_max_f64 v[14:15], v[4:5], v[12:13]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT: v_max_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: v_max_f64 v[12:13], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[10:11], v[0:1], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT: v_max_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nsz:
@@ -1695,14 +1734,23 @@ define <4 x double> @v_maximum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximum_v4f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximum_v4f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximum_v4f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1864,43 +1912,43 @@ define <8 x double> @v_maximum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: scratch_load_dword v31, off, s32
; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
-; GFX950-NEXT: v_max_f64 v[32:33], v[0:1], v[16:17]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX950-NEXT: v_max_f64 v[34:35], v[2:3], v[18:19]
-; GFX950-NEXT: v_max_f64 v[36:37], v[4:5], v[20:21]
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT: v_max_f64 v[32:33], v[12:13], v[28:29]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: v_max_f64 v[34:35], v[10:11], v[26:27]
+; GFX950-NEXT: v_max_f64 v[36:37], v[8:9], v[24:25]
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v33, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v32, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
; GFX950-NEXT: v_max_f64 v[38:39], v[6:7], v[22:23]
-; GFX950-NEXT: v_max_f64 v[48:49], v[8:9], v[24:25]
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX950-NEXT: v_max_f64 v[50:51], v[10:11], v[26:27]
-; GFX950-NEXT: v_max_f64 v[52:53], v[12:13], v[28:29]
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_max_f64 v[48:49], v[4:5], v[20:21]
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v35, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v34, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT: v_max_f64 v[50:51], v[2:3], v[18:19]
+; GFX950-NEXT: v_max_f64 v[52:53], v[0:1], v[16:17]
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v37, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v36, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v49, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v48, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v51, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v50, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_max_f64 v[16:17], v[14:15], v[30:31]
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v53, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v52, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v8f64:
@@ -2371,152 +2419,144 @@ define <16 x double> @v_maximum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8
-; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4
-; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
-; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
-; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
-; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:120
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:116
; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:32
; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:28
-; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
-; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
-; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
-; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
-; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
-; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
-; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
-; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
-; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
-; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
-; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:96
-; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:24
+; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:16
+; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:12
+; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:112
; GFX950-NEXT: scratch_load_dword v31, off, s32
-; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104
-; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100
+; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:128
+; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:124
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:108
+; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:104
+; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:100
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:96
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:88
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:84
+; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:80
+; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:76
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:72
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:68
+; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_max_f64 v[58:59], v[0:1], v[32:33]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33]
-; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112
-; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:108
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_max_f64 v[60:61], v[2:3], v[36:37]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
-; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:120
-; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:116
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_max_f64 v[62:63], v[4:5], v[38:39]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
-; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:128
-; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:124
-; GFX950-NEXT: v_mov_b32_e32 v2, 0x7ff80000
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_max_f64 v[0:1], v[6:7], v[56:57]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
+; GFX950-NEXT: v_mov_b32_e32 v60, 0x7ff80000
; GFX950-NEXT: s_waitcnt vmcnt(23)
-; GFX950-NEXT: v_max_f64 v[56:57], v[8:9], v[46:47]
-; GFX950-NEXT: v_cndmask_b32_e64 v58, v58, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v59, v59, v2, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v0, 0, s[4:5]
-; GFX950-NEXT: v_cndmask_b32_e64 v7, v1, v2, s[4:5]
-; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(21)
-; GFX950-NEXT: v_max_f64 v[0:1], v[10:11], v[44:45]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX950-NEXT: v_cndmask_b32_e64 v60, v60, 0, s[0:1]
-; GFX950-NEXT: v_cndmask_b32_e64 v3, v61, v2, s[0:1]
-; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v2, vcc
+; GFX950-NEXT: v_max_f64 v[46:47], v[28:29], v[40:41]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[40:41]
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[58:59], v[6:7], v[56:57]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[6:7], v[56:57]
+; GFX950-NEXT: scratch_load_dword v7, off, s32 offset:56
+; GFX950-NEXT: scratch_load_dword v6, off, s32 offset:52
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[56:57], v[4:5], v[44:45]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[44:45]
+; GFX950-NEXT: scratch_load_dword v5, off, s32 offset:48
+; GFX950-NEXT: scratch_load_dword v4, off, s32 offset:44
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[44:45], v[2:3], v[42:43]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[42:43]
+; GFX950-NEXT: scratch_load_dword v3, off, s32 offset:40
+; GFX950-NEXT: scratch_load_dword v2, off, s32 offset:36
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_max_f64 v[42:43], v[0:1], v[52:53]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[6:7], v[0:1], v[52:53]
; GFX950-NEXT: s_waitcnt vmcnt(19)
-; GFX950-NEXT: v_max_f64 v[0:1], v[12:13], v[42:43]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v62, 0, s[2:3]
-; GFX950-NEXT: v_cndmask_b32_e64 v5, v63, v2, s[2:3]
-; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(17)
-; GFX950-NEXT: v_max_f64 v[0:1], v[14:15], v[40:41]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX950-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(15)
-; GFX950-NEXT: v_max_f64 v[0:1], v[16:17], v[54:55]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
-; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(13)
-; GFX950-NEXT: v_max_f64 v[0:1], v[18:19], v[52:53]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
-; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT: v_max_f64 v[0:1], v[30:31], v[54:55]
+; GFX950-NEXT: s_waitcnt vmcnt(18)
+; GFX950-NEXT: v_max_f64 v[52:53], v[26:27], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[8:9], v[30:31], v[54:55]
+; GFX950-NEXT: v_cndmask_b32_e32 v29, v47, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v28, v46, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[48:49]
+; GFX950-NEXT: v_cndmask_b32_e64 v31, v1, v60, s[8:9]
+; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, s[8:9]
+; GFX950-NEXT: v_cndmask_b32_e32 v27, v53, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v26, v52, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(16)
+; GFX950-NEXT: v_max_f64 v[0:1], v[24:25], v[50:51]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[50:51]
; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(11)
-; GFX950-NEXT: v_max_f64 v[0:1], v[20:21], v[50:51]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(9)
-; GFX950-NEXT: v_max_f64 v[0:1], v[22:23], v[48:49]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
-; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(14)
+; GFX950-NEXT: v_max_f64 v[0:1], v[22:23], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[38:39]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v60, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v22, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v2, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(12)
+; GFX950-NEXT: v_max_f64 v[0:1], v[20:21], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[36:37]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(10)
+; GFX950-NEXT: v_max_f64 v[0:1], v[18:19], v[34:35]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[34:35]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(8)
+; GFX950-NEXT: v_max_f64 v[0:1], v[16:17], v[32:33]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[32:33]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(6)
-; GFX950-NEXT: v_max_f64 v[0:1], v[24:25], v[34:35]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[34:35]
-; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
+; GFX950-NEXT: v_max_f64 v[0:1], v[14:15], v[40:41]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
; GFX950-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v2, vcc
; GFX950-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(4)
-; GFX950-NEXT: v_max_f64 v[0:1], v[26:27], v[32:33]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[32:33]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v26, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v27, v1, v2, vcc
+; GFX950-NEXT: v_max_f64 v[0:1], v[12:13], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[6:7]
+; GFX950-NEXT: v_cndmask_b32_e64 v7, v59, v60, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v58, 0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(2)
-; GFX950-NEXT: v_max_f64 v[0:1], v[28:29], v[36:37]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v28, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v29, v1, v2, vcc
+; GFX950-NEXT: v_max_f64 v[0:1], v[10:11], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v5, v57, v60, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v56, 0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_max_f64 v[0:1], v[30:31], v[38:39]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v31, v1, v2, vcc
-; GFX950-NEXT: v_mov_b32_e32 v0, v58
-; GFX950-NEXT: v_mov_b32_e32 v1, v59
-; GFX950-NEXT: v_mov_b32_e32 v2, v60
+; GFX950-NEXT: v_max_f64 v[0:1], v[8:9], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v3, v45, v60, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v44, 0, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v43, v60, s[6:7]
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v42, 0, s[6:7]
; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
index dfd67873c3b86..b119dd425463b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minimum.f64.ll
@@ -561,17 +561,17 @@ define <2 x double> @v_minimum_v2f64(<2 x double> %src0, <2 x double> %src1) {
; GFX950-LABEL: v_minimum_v2f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[6:7]
+; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[0:1], v[4:5]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64:
@@ -630,12 +630,19 @@ define <2 x double> @v_minimum_v2f64__nnan(<2 x double> %src0, <2 x double> %src
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nnan:
; GFX10: ; %bb.0:
@@ -711,17 +718,17 @@ define <2 x double> @v_minimum_v2f64__nsz(<2 x double> %src0, <2 x double> %src1
; GFX950-LABEL: v_minimum_v2f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[4:5]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
-; GFX950-NEXT: v_min_f64 v[4:5], v[2:3], v[6:7]
-; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v8, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
+; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[6:7]
+; GFX950-NEXT: v_mov_b32_e32 v10, 0x7ff80000
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[6:7]
+; GFX950-NEXT: v_min_f64 v[6:7], v[0:1], v[4:5]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[4:5]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v4, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v5, v8, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v7, v10, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nsz:
@@ -780,12 +787,19 @@ define <2 x double> @v_minimum_v2f64__nnan_nsz(<2 x double> %src0, <2 x double>
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v2f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v2f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v2f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v2f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1008,22 +1022,22 @@ define <3 x double> @v_minimum_v3f64(<3 x double> %src0, <3 x double> %src1) {
; GFX950-LABEL: v_minimum_v3f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[12:13], v[4:5], v[10:11]
+; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: v_min_f64 v[10:11], v[2:3], v[8:9]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64:
@@ -1092,13 +1106,21 @@ define <3 x double> @v_minimum_v3f64__nnan(<3 x double> %src0, <3 x double> %src
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nnan:
; GFX10: ; %bb.0:
@@ -1189,22 +1211,22 @@ define <3 x double> @v_minimum_v3f64__nsz(<3 x double> %src0, <3 x double> %src1
; GFX950-LABEL: v_minimum_v3f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[12:13], v[0:1], v[6:7]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
-; GFX950-NEXT: v_min_f64 v[6:7], v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[12:13], v[4:5], v[10:11]
+; GFX950-NEXT: v_mov_b32_e32 v14, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
+; GFX950-NEXT: v_min_f64 v[10:11], v[2:3], v[8:9]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v12, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v13, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v12, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[8:9], v[0:1], v[6:7]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v11, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[6:7]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v7, v12, vcc
-; GFX950-NEXT: v_min_f64 v[6:7], v[4:5], v[10:11]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v6, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v7, v12, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v9, v14, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v8, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nsz:
@@ -1273,13 +1295,21 @@ define <3 x double> @v_minimum_v3f64__nnan_nsz(<3 x double> %src0, <3 x double>
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v3f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v3f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v3f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v3f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1382,27 +1412,27 @@ define <4 x double> @v_minimum_v4f64(<4 x double> %src0, <4 x double> %src1) {
; GFX950-LABEL: v_minimum_v4f64:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[16:17], v[6:7], v[14:15]
+; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: v_min_f64 v[14:15], v[4:5], v[12:13]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: v_min_f64 v[12:13], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[10:11], v[0:1], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64:
@@ -1482,14 +1512,23 @@ define <4 x double> @v_minimum_v4f64__nnan(<4 x double> %src0, <4 x double> %src
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f64__nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f64__nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f64__nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nnan:
; GFX10: ; %bb.0:
@@ -1595,27 +1634,27 @@ define <4 x double> @v_minimum_v4f64__nsz(<4 x double> %src0, <4 x double> %src1
; GFX950-LABEL: v_minimum_v4f64__nsz:
; GFX950: ; %bb.0:
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX950-NEXT: v_min_f64 v[16:17], v[0:1], v[8:9]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
-; GFX950-NEXT: v_min_f64 v[8:9], v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[16:17], v[6:7], v[14:15]
+; GFX950-NEXT: v_mov_b32_e32 v18, 0x7ff80000
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
+; GFX950-NEXT: v_min_f64 v[14:15], v[4:5], v[12:13]
; GFX950-NEXT: s_nop 0
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v16, 0, vcc
-; GFX950-NEXT: v_mov_b32_e32 v16, 0x7ff80000
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v17, v16, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v9, v16, vcc
-; GFX950-NEXT: v_min_f64 v[8:9], v[4:5], v[12:13]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v17, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v16, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[12:13]
+; GFX950-NEXT: v_min_f64 v[12:13], v[2:3], v[10:11]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v15, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v14, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[10:11], v[0:1], v[8:9]
+; GFX950-NEXT: s_nop 0
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v13, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v12, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[8:9]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v9, v16, vcc
-; GFX950-NEXT: v_min_f64 v[8:9], v[6:7], v[14:15]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[14:15]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v8, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v9, v16, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v11, v18, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nsz:
@@ -1695,14 +1734,23 @@ define <4 x double> @v_minimum_v4f64__nnan_nsz(<4 x double> %src0, <4 x double>
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimum_v4f64__nnan_nsz:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimum_v4f64__nnan_nsz:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimum_v4f64__nnan_nsz:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v4f64__nnan_nsz:
; GFX10: ; %bb.0:
@@ -1864,43 +1912,43 @@ define <8 x double> @v_minimum_v8f64(<8 x double> %src0, <8 x double> %src1) {
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX950-NEXT: scratch_load_dword v31, off, s32
; GFX950-NEXT: v_mov_b32_e32 v54, 0x7ff80000
-; GFX950-NEXT: v_min_f64 v[32:33], v[0:1], v[16:17]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
-; GFX950-NEXT: v_min_f64 v[34:35], v[2:3], v[18:19]
-; GFX950-NEXT: v_min_f64 v[36:37], v[4:5], v[20:21]
-; GFX950-NEXT: v_cndmask_b32_e64 v0, v32, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v1, v33, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
+; GFX950-NEXT: v_min_f64 v[32:33], v[12:13], v[28:29]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: v_min_f64 v[34:35], v[10:11], v[26:27]
+; GFX950-NEXT: v_min_f64 v[36:37], v[8:9], v[24:25]
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v33, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v32, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
; GFX950-NEXT: v_min_f64 v[38:39], v[6:7], v[22:23]
-; GFX950-NEXT: v_min_f64 v[48:49], v[8:9], v[24:25]
-; GFX950-NEXT: v_cndmask_b32_e64 v2, v34, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v3, v35, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
-; GFX950-NEXT: v_min_f64 v[50:51], v[10:11], v[26:27]
-; GFX950-NEXT: v_min_f64 v[52:53], v[12:13], v[28:29]
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v36, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v5, v37, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
-; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_min_f64 v[48:49], v[4:5], v[20:21]
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v35, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v34, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[24:25]
+; GFX950-NEXT: v_min_f64 v[50:51], v[2:3], v[18:19]
+; GFX950-NEXT: v_min_f64 v[52:53], v[0:1], v[16:17]
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v37, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v36, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[6:7], v[22:23]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v8, v48, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v49, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[26:27]
+; GFX950-NEXT: v_cndmask_b32_e32 v7, v39, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v38, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[4:5], v[20:21]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v10, v50, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v51, v54, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[28:29]
+; GFX950-NEXT: v_cndmask_b32_e32 v5, v49, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v48, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[18:19]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v12, v52, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v53, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e32 v3, v51, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v50, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[16:17]
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: v_min_f64 v[16:17], v[14:15], v[30:31]
+; GFX950-NEXT: v_cndmask_b32_e32 v1, v53, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v52, 0, vcc
; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[30:31]
; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e32 v15, v17, v54, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v16, 0, vcc
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v8f64:
@@ -2371,152 +2419,144 @@ define <16 x double> @v_minimum_v16f64(<16 x double> %src0, <16 x double> %src1)
; GFX950-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse
-; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:8
-; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:4
-; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:16
-; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:12
-; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:24
-; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:120
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:116
; GFX950-NEXT: scratch_load_dword v57, off, s32 offset:32
; GFX950-NEXT: scratch_load_dword v56, off, s32 offset:28
-; GFX950-NEXT: scratch_load_dword v47, off, s32 offset:40
-; GFX950-NEXT: scratch_load_dword v46, off, s32 offset:36
-; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:48
-; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:44
-; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:56
-; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:52
-; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
-; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
-; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:72
-; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:68
-; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:80
-; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:76
-; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:88
-; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:84
-; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:96
-; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v45, off, s32 offset:24
+; GFX950-NEXT: scratch_load_dword v44, off, s32 offset:20
+; GFX950-NEXT: scratch_load_dword v43, off, s32 offset:16
+; GFX950-NEXT: scratch_load_dword v42, off, s32 offset:12
+; GFX950-NEXT: scratch_load_dword v53, off, s32 offset:8
+; GFX950-NEXT: scratch_load_dword v52, off, s32 offset:4
+; GFX950-NEXT: scratch_load_dword v49, off, s32 offset:112
; GFX950-NEXT: scratch_load_dword v31, off, s32
-; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:104
-; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:100
+; GFX950-NEXT: scratch_load_dword v55, off, s32 offset:128
+; GFX950-NEXT: scratch_load_dword v54, off, s32 offset:124
+; GFX950-NEXT: scratch_load_dword v48, off, s32 offset:108
+; GFX950-NEXT: scratch_load_dword v51, off, s32 offset:104
+; GFX950-NEXT: scratch_load_dword v50, off, s32 offset:100
+; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:96
+; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:92
+; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:88
+; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:84
+; GFX950-NEXT: scratch_load_dword v35, off, s32 offset:80
+; GFX950-NEXT: scratch_load_dword v34, off, s32 offset:76
+; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:72
+; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:68
+; GFX950-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_min_f64 v[58:59], v[0:1], v[32:33]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[32:33]
-; GFX950-NEXT: scratch_load_dword v33, off, s32 offset:112
-; GFX950-NEXT: scratch_load_dword v32, off, s32 offset:108
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_min_f64 v[60:61], v[2:3], v[36:37]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[2:3], v[36:37]
-; GFX950-NEXT: scratch_load_dword v37, off, s32 offset:120
-; GFX950-NEXT: scratch_load_dword v36, off, s32 offset:116
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_min_f64 v[62:63], v[4:5], v[38:39]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[38:39]
-; GFX950-NEXT: scratch_load_dword v39, off, s32 offset:128
-; GFX950-NEXT: scratch_load_dword v38, off, s32 offset:124
-; GFX950-NEXT: v_mov_b32_e32 v2, 0x7ff80000
-; GFX950-NEXT: s_waitcnt vmcnt(25)
-; GFX950-NEXT: v_min_f64 v[0:1], v[6:7], v[56:57]
-; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[6:7], v[56:57]
+; GFX950-NEXT: v_mov_b32_e32 v60, 0x7ff80000
; GFX950-NEXT: s_waitcnt vmcnt(23)
-; GFX950-NEXT: v_min_f64 v[56:57], v[8:9], v[46:47]
-; GFX950-NEXT: v_cndmask_b32_e64 v58, v58, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v59, v59, v2, vcc
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[46:47]
-; GFX950-NEXT: v_cndmask_b32_e64 v6, v0, 0, s[4:5]
-; GFX950-NEXT: v_cndmask_b32_e64 v7, v1, v2, s[4:5]
-; GFX950-NEXT: v_cndmask_b32_e64 v8, v56, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v9, v57, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(21)
-; GFX950-NEXT: v_min_f64 v[0:1], v[10:11], v[44:45]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[44:45]
-; GFX950-NEXT: v_cndmask_b32_e64 v60, v60, 0, s[0:1]
-; GFX950-NEXT: v_cndmask_b32_e64 v3, v61, v2, s[0:1]
-; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v2, vcc
+; GFX950-NEXT: v_min_f64 v[46:47], v[28:29], v[40:41]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[40:41]
+; GFX950-NEXT: scratch_load_dword v41, off, s32 offset:64
+; GFX950-NEXT: scratch_load_dword v40, off, s32 offset:60
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[58:59], v[6:7], v[56:57]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[0:1], v[6:7], v[56:57]
+; GFX950-NEXT: scratch_load_dword v7, off, s32 offset:56
+; GFX950-NEXT: scratch_load_dword v6, off, s32 offset:52
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[56:57], v[4:5], v[44:45]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[2:3], v[4:5], v[44:45]
+; GFX950-NEXT: scratch_load_dword v5, off, s32 offset:48
+; GFX950-NEXT: scratch_load_dword v4, off, s32 offset:44
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[44:45], v[2:3], v[42:43]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[4:5], v[2:3], v[42:43]
+; GFX950-NEXT: scratch_load_dword v3, off, s32 offset:40
+; GFX950-NEXT: scratch_load_dword v2, off, s32 offset:36
+; GFX950-NEXT: s_waitcnt vmcnt(23)
+; GFX950-NEXT: v_min_f64 v[42:43], v[0:1], v[52:53]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[6:7], v[0:1], v[52:53]
; GFX950-NEXT: s_waitcnt vmcnt(19)
-; GFX950-NEXT: v_min_f64 v[0:1], v[12:13], v[42:43]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[42:43]
-; GFX950-NEXT: v_cndmask_b32_e64 v4, v62, 0, s[2:3]
-; GFX950-NEXT: v_cndmask_b32_e64 v5, v63, v2, s[2:3]
-; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(17)
-; GFX950-NEXT: v_min_f64 v[0:1], v[14:15], v[40:41]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
-; GFX950-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(15)
-; GFX950-NEXT: v_min_f64 v[0:1], v[16:17], v[54:55]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[54:55]
-; GFX950-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(13)
-; GFX950-NEXT: v_min_f64 v[0:1], v[18:19], v[52:53]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[52:53]
-; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT: v_min_f64 v[0:1], v[30:31], v[54:55]
+; GFX950-NEXT: s_waitcnt vmcnt(18)
+; GFX950-NEXT: v_min_f64 v[52:53], v[26:27], v[48:49]
+; GFX950-NEXT: v_cmp_u_f64_e64 s[8:9], v[30:31], v[54:55]
+; GFX950-NEXT: v_cndmask_b32_e32 v29, v47, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v28, v46, 0, vcc
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[48:49]
+; GFX950-NEXT: v_cndmask_b32_e64 v31, v1, v60, s[8:9]
+; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, s[8:9]
+; GFX950-NEXT: v_cndmask_b32_e32 v27, v53, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v26, v52, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(16)
+; GFX950-NEXT: v_min_f64 v[0:1], v[24:25], v[50:51]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[50:51]
; GFX950-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(11)
-; GFX950-NEXT: v_min_f64 v[0:1], v[20:21], v[50:51]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[50:51]
; GFX950-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v2, vcc
-; GFX950-NEXT: s_waitcnt vmcnt(9)
-; GFX950-NEXT: v_min_f64 v[0:1], v[22:23], v[48:49]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[48:49]
-; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
-; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(14)
+; GFX950-NEXT: v_min_f64 v[0:1], v[22:23], v[38:39]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[22:23], v[38:39]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v60, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v22, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v23, v1, v2, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(12)
+; GFX950-NEXT: v_min_f64 v[0:1], v[20:21], v[36:37]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[20:21], v[36:37]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v21, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v20, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(10)
+; GFX950-NEXT: v_min_f64 v[0:1], v[18:19], v[34:35]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[18:19], v[34:35]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v19, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
+; GFX950-NEXT: s_waitcnt vmcnt(8)
+; GFX950-NEXT: v_min_f64 v[0:1], v[16:17], v[32:33]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[16:17], v[32:33]
+; GFX950-NEXT: s_nop 1
+; GFX950-NEXT: v_cndmask_b32_e32 v17, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v16, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(6)
-; GFX950-NEXT: v_min_f64 v[0:1], v[24:25], v[34:35]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[24:25], v[34:35]
-; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
+; GFX950-NEXT: v_min_f64 v[0:1], v[14:15], v[40:41]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[14:15], v[40:41]
; GFX950-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse
-; GFX950-NEXT: v_cndmask_b32_e64 v24, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v25, v1, v2, vcc
; GFX950-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse
+; GFX950-NEXT: v_cndmask_b32_e32 v15, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v14, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(4)
-; GFX950-NEXT: v_min_f64 v[0:1], v[26:27], v[32:33]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[26:27], v[32:33]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v26, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v27, v1, v2, vcc
+; GFX950-NEXT: v_min_f64 v[0:1], v[12:13], v[6:7]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[12:13], v[6:7]
+; GFX950-NEXT: v_cndmask_b32_e64 v7, v59, v60, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e64 v6, v58, 0, s[0:1]
+; GFX950-NEXT: v_cndmask_b32_e32 v13, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v12, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(2)
-; GFX950-NEXT: v_min_f64 v[0:1], v[28:29], v[36:37]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[28:29], v[36:37]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v28, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v29, v1, v2, vcc
+; GFX950-NEXT: v_min_f64 v[0:1], v[10:11], v[4:5]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[10:11], v[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v5, v57, v60, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v4, v56, 0, s[2:3]
+; GFX950-NEXT: v_cndmask_b32_e32 v11, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v10, v0, 0, vcc
; GFX950-NEXT: s_waitcnt vmcnt(0)
-; GFX950-NEXT: v_min_f64 v[0:1], v[30:31], v[38:39]
-; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[30:31], v[38:39]
-; GFX950-NEXT: s_nop 1
-; GFX950-NEXT: v_cndmask_b32_e64 v30, v0, 0, vcc
-; GFX950-NEXT: v_cndmask_b32_e32 v31, v1, v2, vcc
-; GFX950-NEXT: v_mov_b32_e32 v0, v58
-; GFX950-NEXT: v_mov_b32_e32 v1, v59
-; GFX950-NEXT: v_mov_b32_e32 v2, v60
+; GFX950-NEXT: v_min_f64 v[0:1], v[8:9], v[2:3]
+; GFX950-NEXT: v_cmp_u_f64_e32 vcc, v[8:9], v[2:3]
+; GFX950-NEXT: v_cndmask_b32_e64 v3, v45, v60, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e64 v2, v44, 0, s[4:5]
+; GFX950-NEXT: v_cndmask_b32_e32 v9, v1, v60, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v8, v0, 0, vcc
+; GFX950-NEXT: v_cndmask_b32_e64 v1, v43, v60, s[6:7]
+; GFX950-NEXT: v_cndmask_b32_e64 v0, v42, 0, s[6:7]
; GFX950-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse
; GFX950-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse
+; GFX950-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse
; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimum_v16f64:
diff --git a/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll b/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
index 3b855a56a5abb..88032317165bb 100644
--- a/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
+++ b/llvm/test/CodeGen/AMDGPU/masked-load-vectortypes.ll
@@ -7,11 +7,11 @@ define <2 x i32> @uniform_masked_load_ptr1_mask_v2i32(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB0_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: global_load_dwordx2 v[0:1], v0, s[0:1]
; GFX942-NEXT: .LBB0_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -30,13 +30,12 @@ define <4 x i32> @uniform_masked_load_ptr1_mask_v4i32(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB1_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
+; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
; GFX942-NEXT: .LBB1_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
@@ -80,20 +79,16 @@ define <8 x i32> @uniform_masked_load_ptr1_mask_v8i32(ptr addrspace(1) inreg noc
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: v_and_b32_e32 v0, 1, v0
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v0
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX942-NEXT: s_cbranch_execz .LBB3_2
; GFX942-NEXT: ; %bb.1: ; %cond.load
-; GFX942-NEXT: global_load_dwordx4 v[4:7], v0, s[0:1] offset:16
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: global_load_dwordx4 v[0:3], v0, s[0:1]
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_load_dwordx4 v[4:7], v8, s[0:1] offset:16
+; GFX942-NEXT: global_load_dwordx4 v[0:3], v8, s[0:1]
; GFX942-NEXT: .LBB3_2:
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX942-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/maximumnum.ll b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
index 4f73e8e9c1883..64c602f81cb23 100644
--- a/llvm/test/CodeGen/AMDGPU/maximumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/maximumnum.ll
@@ -5,11 +5,11 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
@@ -62,21 +62,37 @@ define half @v_maximumnum_f16(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16:
; GFX10-SDAG: ; %bb.0:
@@ -211,11 +227,17 @@ define half @v_maximumnum_f16_nnan(half %x, half %y) {
; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_nnan:
; GFX10: ; %bb.0:
@@ -283,12 +305,19 @@ define half @v_maximumnum_f16_1.0(half %x) {
; GFX8-NEXT: v_max_f16_e32 v0, 1.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f16_e32 v0, 1.0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f16_e32 v0, 1.0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f16_e32 v0, 1.0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_1.0:
; GFX10: ; %bb.0:
@@ -373,21 +402,37 @@ define float @v_maximumnum_f32(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32:
; GFX10-SDAG: ; %bb.0:
@@ -461,11 +506,17 @@ define float @v_maximumnum_f32_nnan(float %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_nnan:
; GFX10: ; %bb.0:
@@ -525,21 +576,37 @@ define double @v_maximumnum_f64(double %x, double %y) {
; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f64:
; GFX10-SDAG: ; %bb.0:
@@ -617,11 +684,17 @@ define double @v_maximumnum_f64_nnan(double %x, double %y) {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_nnan:
; GFX10: ; %bb.0:
@@ -663,12 +736,19 @@ define float @v_maximumnum_f32_1.0(float %x) {
; GFX8-NEXT: v_max_f32_e32 v0, 1.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, 1.0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v0, 1.0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_1.0:
; GFX10: ; %bb.0:
@@ -717,13 +797,21 @@ define float @v_maximumnum_f32_rhs_not_snan(float %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_rhs_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_rhs_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_rhs_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_rhs_not_snan:
; GFX10: ; %bb.0:
@@ -774,13 +862,21 @@ define float @v_maximumnum_f32_lhs_not_snan(float %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_lhs_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_lhs_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_lhs_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_lhs_not_snan:
; GFX10: ; %bb.0:
@@ -831,13 +927,21 @@ define float @v_maximumnum_f32_both_operands_not_snan(float %x, float %y) {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_both_operands_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_both_operands_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_both_operands_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_both_operands_not_snan:
; GFX10: ; %bb.0:
@@ -887,12 +991,19 @@ define double @v_maximumnum_f64_1.0(double %x) {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], 1.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_1.0:
; GFX10: ; %bb.0:
@@ -2190,21 +2301,37 @@ define float @v_maximumnum_f32_fabs_rhs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2303,21 +2430,37 @@ define float @v_maximumnum_f32_fneg_fabs_rhs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2417,21 +2560,37 @@ define float @v_maximumnum_f32_fabs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_fabs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_fabs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_fabs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_fabs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32_fabs:
; GFX10-SDAG: ; %bb.0:
@@ -2531,21 +2690,37 @@ define float @v_maximumnum_f32_fneg(float %x, float %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f32_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f32_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f32_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f32_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f32_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -2648,21 +2823,37 @@ define half @v_maximumnum_f16_fabs_rhs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2808,21 +2999,37 @@ define half @v_maximumnum_f16_fneg_fabs_rhs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2969,21 +3176,37 @@ define half @v_maximumnum_f16_fabs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_fabs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_fabs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_fabs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_fabs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16_fabs:
; GFX10-SDAG: ; %bb.0:
@@ -3130,21 +3353,37 @@ define half @v_maximumnum_f16_fneg(half %x, half %y) {
; GFX8-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f16_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f16_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f16_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f16_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f16_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -3288,21 +3527,37 @@ define double @v_maximumnum_f64_fneg(double %x, double %y) {
; GFX8-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_f64_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_f64_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_f64_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_f64_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_f64_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -3564,11 +3819,17 @@ define <2 x half> @v_maximumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f16_nnan:
; GFX10: ; %bb.0:
@@ -3663,16 +3924,16 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v3f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_maximumnum_v3f16:
; GFX900-GISEL: ; %bb.0:
@@ -3685,6 +3946,17 @@ define <3 x half> @v_maximumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v2
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX950-GISEL-LABEL: v_maximumnum_v3f16:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3834,19 +4106,33 @@ define <3 x half> @v_maximumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v3f16_nnan:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v3f16_nnan:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v3f16_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f16_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_maximumnum_v3f16_nnan:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_maximumnum_v3f16_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f16_nnan:
; GFX10: ; %bb.0:
@@ -4157,12 +4443,19 @@ define <4 x half> @v_maximumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v4f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f16_nnan:
; GFX10: ; %bb.0:
@@ -6691,27 +6984,49 @@ define <2 x float> @v_maximumnum_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v2f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_v2f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v2f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v2f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v2
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_v2f32:
; GFX10-SDAG: ; %bb.0:
@@ -6797,12 +7112,19 @@ define <2 x float> @v_maximumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
; GFX8-NEXT: v_max_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v2
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f32_nnan:
; GFX10: ; %bb.0:
@@ -6887,33 +7209,61 @@ define <3 x float> @v_maximumnum_v3f32(<3 x float> %x, <3 x float> %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v3f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_v3f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v3f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_v3f32:
; GFX10-SDAG: ; %bb.0:
@@ -7015,13 +7365,21 @@ define <3 x float> @v_maximumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
; GFX8-NEXT: v_max_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v3f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v3f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v3f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v3
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX950-NEXT: v_max_f32_e32 v2, v2, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f32_nnan:
; GFX10: ; %bb.0:
@@ -7121,39 +7479,73 @@ define <4 x float> @v_maximumnum_v4f32(<4 x float> %x, <4 x float> %y) {
; GFX8-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v4f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_maximumnum_v4f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v4f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v4f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v4
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_maximumnum_v4f32:
; GFX10-SDAG: ; %bb.0:
@@ -7267,14 +7659,23 @@ define <4 x float> @v_maximumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
; GFX8-NEXT: v_max_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_max_f32_e32 v2, v2, v6
-; GFX9-NEXT: v_max_f32_e32 v3, v3, v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX900-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX900-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v4f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v4
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v5
+; GFX950-NEXT: v_max_f32_e32 v2, v2, v6
+; GFX950-NEXT: v_max_f32_e32 v3, v3, v7
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f32_nnan:
; GFX10: ; %bb.0:
@@ -7376,12 +7777,12 @@ define <2 x double> @v_maximumnum_v2f64(<2 x double> %x, <2 x double> %y) {
; GFX950-SDAG-LABEL: v_maximumnum_v2f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[4:5]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_maximumnum_v2f64:
@@ -7491,12 +7892,26 @@ define <2 x double> @v_maximumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
; GFX8-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v2f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v2f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f64_nnan:
; GFX10: ; %bb.0:
@@ -7614,15 +8029,15 @@ define <3 x double> @v_maximumnum_v3f64(<3 x double> %x, <3 x double> %y) {
; GFX950-SDAG-LABEL: v_maximumnum_v3f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[6:7]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_maximumnum_v3f64:
@@ -7755,13 +8170,29 @@ define <3 x double> @v_maximumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
; GFX8-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v3f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v3f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v3f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f64_nnan:
; GFX10: ; %bb.0:
@@ -7900,18 +8331,18 @@ define <4 x double> @v_maximumnum_v4f64(<4 x double> %x, <4 x double> %y) {
; GFX950-SDAG-LABEL: v_maximumnum_v4f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[8:9]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_maximumnum_v4f64:
@@ -8067,14 +8498,32 @@ define <4 x double> @v_maximumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
; GFX8-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v4f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_maximumnum_v4f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-GISEL-NEXT: v_max_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-GISEL-NEXT: v_max_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f64_nnan:
; GFX10: ; %bb.0:
@@ -8136,11 +8585,17 @@ define half @v_maximumnum_f16_no_ieee(half %x, half %y) #0 {
; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_no_ieee:
; GFX10: ; %bb.0:
@@ -8241,11 +8696,17 @@ define half @v_maximumnum_f16_nan_no_ieee(half %x, half %y) #0 {
; GFX8-NEXT: v_max_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f16_nan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f16_nan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f16_nan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f16_nan_no_ieee:
; GFX10: ; %bb.0:
@@ -8301,11 +8762,17 @@ define float @v_maximumnum_f32_no_ieee(float %x, float %y) #0 {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_no_ieee:
; GFX10: ; %bb.0:
@@ -8359,11 +8826,17 @@ define float @v_maximumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
; GFX8-NEXT: v_max_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f32_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f32_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f32_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f32_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8403,11 +8876,17 @@ define double @v_maximumnum_f64_no_ieee(double %x, double %y) #0 {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_no_ieee:
; GFX10: ; %bb.0:
@@ -8463,11 +8942,17 @@ define double @v_maximumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
; GFX8-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_f64_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_f64_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_f64_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_f64_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8539,11 +9024,17 @@ define <2 x half> @v_maximumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f16_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f16_no_ieee:
; GFX10: ; %bb.0:
@@ -8631,11 +9122,17 @@ define <2 x half> @v_maximumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v2f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8718,19 +9215,33 @@ define <3 x half> @v_maximumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-GISEL-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v3f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8829,12 +9340,19 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_max_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_max_f16 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_max_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_maximumnum_v4f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8865,6 +9383,3 @@ define <4 x half> @v_maximumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
}
attributes #0 = { "amdgpu-ieee"="false" }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX900: {{.*}}
-; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/minimumnum.ll b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
index 558006d2b6957..9c4a1ca797110 100644
--- a/llvm/test/CodeGen/AMDGPU/minimumnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimumnum.ll
@@ -5,11 +5,11 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX900,GFX900-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-SDAG,GFX950-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX9,GFX950,GFX9-GISEL,GFX950-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
@@ -62,21 +62,37 @@ define half @v_minimumnum_f16(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16:
; GFX10-SDAG: ; %bb.0:
@@ -211,11 +227,17 @@ define half @v_minimumnum_f16_nnan(half %x, half %y) {
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_nnan:
; GFX10: ; %bb.0:
@@ -283,12 +305,19 @@ define half @v_minimumnum_f16_1.0(half %x) {
; GFX8-NEXT: v_min_f16_e32 v0, 1.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f16_e32 v0, 1.0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-NEXT: v_min_f16_e32 v0, 1.0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-NEXT: v_min_f16_e32 v0, 1.0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_1.0:
; GFX10: ; %bb.0:
@@ -373,21 +402,37 @@ define float @v_minimumnum_f32(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32:
; GFX10-SDAG: ; %bb.0:
@@ -461,11 +506,17 @@ define float @v_minimumnum_f32_nnan(float %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_nnan:
; GFX10: ; %bb.0:
@@ -525,21 +576,37 @@ define double @v_minimumnum_f64(double %x, double %y) {
; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f64:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f64:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f64:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f64:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f64:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f64:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f64:
; GFX10-SDAG: ; %bb.0:
@@ -617,11 +684,17 @@ define double @v_minimumnum_f64_nnan(double %x, double %y) {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_nnan:
; GFX10: ; %bb.0:
@@ -663,12 +736,19 @@ define float @v_minimumnum_f32_1.0(float %x) {
; GFX8-NEXT: v_min_f32_e32 v0, 1.0, v0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, 1.0, v0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_min_f32_e32 v0, 1.0, v0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_1.0:
; GFX10: ; %bb.0:
@@ -717,13 +797,21 @@ define float @v_minimumnum_f32_rhs_not_snan(float %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_rhs_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_rhs_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_rhs_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_rhs_not_snan:
; GFX10: ; %bb.0:
@@ -774,13 +862,21 @@ define float @v_minimumnum_f32_lhs_not_snan(float %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_lhs_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_lhs_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_lhs_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_lhs_not_snan:
; GFX10: ; %bb.0:
@@ -831,13 +927,21 @@ define float @v_minimumnum_f32_both_operands_not_snan(float %x, float %y) {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_both_operands_not_snan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_both_operands_not_snan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_both_operands_not_snan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_both_operands_not_snan:
; GFX10: ; %bb.0:
@@ -887,12 +991,19 @@ define double @v_minimumnum_f64_1.0(double %x) {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_1.0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_1.0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_1.0:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], 1.0
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_1.0:
; GFX10: ; %bb.0:
@@ -2015,21 +2126,37 @@ define float @v_minimumnum_f32_fabs_rhs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2128,21 +2255,37 @@ define float @v_minimumnum_f32_fneg_fabs_rhs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -|v1|, -|v1|
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2242,21 +2385,37 @@ define float @v_minimumnum_f32_fabs(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_fabs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_fabs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_fabs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_fabs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, |v0|, |v0|
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32_fabs:
; GFX10-SDAG: ; %bb.0:
@@ -2356,21 +2515,37 @@ define float @v_minimumnum_f32_fneg(float %x, float %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f32_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f32_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
-; GFX9-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX900-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX900-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f32_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX950-SDAG-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f32_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GFX950-GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f32_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -2473,21 +2648,37 @@ define half @v_minimumnum_f16_fabs_rhs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2633,21 +2824,37 @@ define half @v_minimumnum_f16_fneg_fabs_rhs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX900-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX950-SDAG-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -|v1|, -|v1|
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg_fabs_rhs:
; GFX10-SDAG: ; %bb.0:
@@ -2794,21 +3001,37 @@ define half @v_minimumnum_f16_fabs(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_fabs:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_fabs:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_fabs:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_fabs:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, |v0|, |v0|
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, |v1|, |v1|
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16_fabs:
; GFX10-SDAG: ; %bb.0:
@@ -2955,21 +3178,37 @@ define half @v_minimumnum_f16_fneg(half %x, half %y) {
; GFX8-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f16_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f16_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
-; GFX9-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
-; GFX9-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX900-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX900-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX900-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX900-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f16_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX950-SDAG-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX950-SDAG-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f16_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
+; GFX950-GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
+; GFX950-GISEL-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f16_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -3113,21 +3352,37 @@ define double @v_minimumnum_f64_fneg(double %x, double %y) {
; GFX8-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_f64_fneg:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_f64_fneg:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
-; GFX9-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
-; GFX9-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX900-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX900-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX900-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX900-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_f64_fneg:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX950-SDAG-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_f64_fneg:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GFX950-GISEL-NEXT: v_max_f64 v[2:3], -v[2:3], -v[2:3]
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_f64_fneg:
; GFX10-SDAG: ; %bb.0:
@@ -3389,11 +3644,17 @@ define <2 x half> @v_minimumnum_v2f16_nnan(<2 x half> %x, <2 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f16_nnan:
; GFX10: ; %bb.0:
@@ -3488,16 +3749,16 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v3f16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
-; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
-; GFX9-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
-; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
-; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX900-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX900-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX900-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX900-GISEL-LABEL: v_minimumnum_v3f16:
; GFX900-GISEL: ; %bb.0:
@@ -3510,6 +3771,17 @@ define <3 x half> @v_minimumnum_v3f16(<3 x half> %x, <3 x half> %y) {
; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v2
; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f16:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_max_f16 v3, v3, v3
+; GFX950-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
+; GFX950-SDAG-NEXT: v_pk_max_f16 v2, v2, v2
+; GFX950-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
; GFX950-GISEL-LABEL: v_minimumnum_v3f16:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -3659,19 +3931,33 @@ define <3 x half> @v_minimumnum_v3f16_nnan(<3 x half> %x, <3 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v3f16_nnan:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v3f16_nnan:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f16_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_minimumnum_v3f16_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f16_nnan:
; GFX10: ; %bb.0:
@@ -3982,12 +4268,19 @@ define <4 x half> @v_minimumnum_v4f16_nnan(<4 x half> %x, <4 x half> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f16_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f16_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v4f16_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f16_nnan:
; GFX10: ; %bb.0:
@@ -6516,27 +6809,49 @@ define <2 x float> @v_minimumnum_v2f32(<2 x float> %x, <2 x float> %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v2f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_v2f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
-; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v2f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v2f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v3, v3
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v2
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_v2f32:
; GFX10-SDAG: ; %bb.0:
@@ -6622,12 +6937,19 @@ define <2 x float> @v_minimumnum_v2f32_nnan(<2 x float> %x, <2 x float> %y) {
; GFX8-NEXT: v_min_f32_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v2
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v2
+; GFX950-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f32_nnan:
; GFX10: ; %bb.0:
@@ -6712,33 +7034,61 @@ define <3 x float> @v_minimumnum_v3f32(<3 x float> %x, <3 x float> %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v3f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_v3f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
-; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
-; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v3f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v4, v4
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v5, v5
+; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_v3f32:
; GFX10-SDAG: ; %bb.0:
@@ -6840,13 +7190,21 @@ define <3 x float> @v_minimumnum_v3f32_nnan(<3 x float> %x, <3 x float> %y) {
; GFX8-NEXT: v_min_f32_e32 v2, v2, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v3f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v3
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v5
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v3f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v3f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v3
+; GFX950-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX950-NEXT: v_min_f32_e32 v2, v2, v5
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f32_nnan:
; GFX10: ; %bb.0:
@@ -6946,39 +7304,73 @@ define <4 x float> @v_minimumnum_v4f32(<4 x float> %x, <4 x float> %y) {
; GFX8-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v4f32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
-; GFX9-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_minimumnum_v4f32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
-; GFX9-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
-; GFX9-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
-; GFX9-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
-; GFX9-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
-; GFX9-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
-; GFX9-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX900-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX900-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX900-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX900-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX900-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX900-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX900-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX900-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX900-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX900-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v4f32:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-SDAG-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-SDAG-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-SDAG-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-SDAG-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-SDAG-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX950-SDAG-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-SDAG-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-SDAG-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v4f32:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_max_f32_e32 v0, v0, v0
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v4, v4
+; GFX950-GISEL-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v1, v1, v1
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v5, v5
+; GFX950-GISEL-NEXT: v_min_f32_e32 v1, v1, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v2, v2, v2
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v6, v6
+; GFX950-GISEL-NEXT: v_min_f32_e32 v2, v2, v4
+; GFX950-GISEL-NEXT: v_max_f32_e32 v3, v3, v3
+; GFX950-GISEL-NEXT: v_max_f32_e32 v4, v7, v7
+; GFX950-GISEL-NEXT: v_min_f32_e32 v3, v3, v4
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-SDAG-LABEL: v_minimumnum_v4f32:
; GFX10-SDAG: ; %bb.0:
@@ -7092,14 +7484,23 @@ define <4 x float> @v_minimumnum_v4f32_nnan(<4 x float> %x, <4 x float> %y) {
; GFX8-NEXT: v_min_f32_e32 v3, v3, v7
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f32_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v4
-; GFX9-NEXT: v_min_f32_e32 v1, v1, v5
-; GFX9-NEXT: v_min_f32_e32 v2, v2, v6
-; GFX9-NEXT: v_min_f32_e32 v3, v3, v7
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f32_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX900-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX900-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX900-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v4f32_nnan:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v4
+; GFX950-NEXT: v_min_f32_e32 v1, v1, v5
+; GFX950-NEXT: v_min_f32_e32 v2, v2, v6
+; GFX950-NEXT: v_min_f32_e32 v3, v3, v7
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f32_nnan:
; GFX10: ; %bb.0:
@@ -7201,12 +7602,12 @@ define <2 x double> @v_minimumnum_v2f64(<2 x double> %x, <2 x double> %y) {
; GFX950-SDAG-LABEL: v_minimumnum_v2f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[6:7], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_minimumnum_v2f64:
@@ -7316,12 +7717,26 @@ define <2 x double> @v_minimumnum_v2f64_nnan(<2 x double> %x, <2 x double> %y) {
; GFX8-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v2f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v2f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[4:5]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f64_nnan:
; GFX10: ; %bb.0:
@@ -7439,15 +7854,15 @@ define <3 x double> @v_minimumnum_v3f64(<3 x double> %x, <3 x double> %y) {
; GFX950-SDAG-LABEL: v_minimumnum_v3f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[8:9], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[6:7]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[10:11], v[10:11]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_minimumnum_v3f64:
@@ -7580,13 +7995,29 @@ define <3 x double> @v_minimumnum_v3f64_nnan(<3 x double> %x, <3 x double> %y) {
; GFX8-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v3f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v3f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v3f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[6:7]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
+; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[10:11]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f64_nnan:
; GFX10: ; %bb.0:
@@ -7725,18 +8156,18 @@ define <4 x double> @v_minimumnum_v4f64(<4 x double> %x, <4 x double> %y) {
; GFX950-SDAG-LABEL: v_minimumnum_v4f64:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_max_f64 v[14:15], v[14:15], v[14:15]
+; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
+; GFX950-SDAG-NEXT: v_max_f64 v[12:13], v[12:13], v[12:13]
+; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
+; GFX950-SDAG-NEXT: v_max_f64 v[10:11], v[10:11], v[10:11]
+; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[8:9], v[8:9]
; GFX950-SDAG-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
+; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[10:11], v[10:11]
-; GFX950-SDAG-NEXT: v_max_f64 v[2:3], v[2:3], v[2:3]
-; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[12:13], v[12:13]
-; GFX950-SDAG-NEXT: v_max_f64 v[4:5], v[4:5], v[4:5]
-; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[8:9]
-; GFX950-SDAG-NEXT: v_max_f64 v[8:9], v[14:15], v[14:15]
-; GFX950-SDAG-NEXT: v_max_f64 v[6:7], v[6:7], v[6:7]
-; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[8:9]
; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX950-GISEL-LABEL: v_minimumnum_v4f64:
@@ -7892,14 +8323,32 @@ define <4 x double> @v_minimumnum_v4f64_nnan(<4 x double> %x, <4 x double> %y) {
; GFX8-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f64_nnan:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
-; GFX9-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
-; GFX9-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
-; GFX9-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f64_nnan:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX900-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX900-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX900-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v4f64_nnan:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-SDAG-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-SDAG-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-SDAG-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-GISEL-LABEL: v_minimumnum_v4f64_nnan:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_min_f64 v[0:1], v[0:1], v[8:9]
+; GFX950-GISEL-NEXT: v_min_f64 v[2:3], v[2:3], v[10:11]
+; GFX950-GISEL-NEXT: v_min_f64 v[4:5], v[4:5], v[12:13]
+; GFX950-GISEL-NEXT: v_min_f64 v[6:7], v[6:7], v[14:15]
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f64_nnan:
; GFX10: ; %bb.0:
@@ -7961,11 +8410,17 @@ define half @v_minimumnum_f16_no_ieee(half %x, half %y) #0 {
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_no_ieee:
; GFX10: ; %bb.0:
@@ -8066,11 +8521,17 @@ define half @v_minimumnum_f16_nan_no_ieee(half %x, half %y) #0 {
; GFX8-NEXT: v_min_f16_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f16_nan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f16_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f16_nan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f16_nan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f16_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f16_nan_no_ieee:
; GFX10: ; %bb.0:
@@ -8126,11 +8587,17 @@ define float @v_minimumnum_f32_no_ieee(float %x, float %y) #0 {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_no_ieee:
; GFX10: ; %bb.0:
@@ -8184,11 +8651,17 @@ define float @v_minimumnum_f32_nnan_no_ieee(float %x, float %y) #0 {
; GFX8-NEXT: v_min_f32_e32 v0, v0, v1
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f32_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f32_e32 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f32_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f32_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f32_e32 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f32_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8228,11 +8701,17 @@ define double @v_minimumnum_f64_no_ieee(double %x, double %y) #0 {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_no_ieee:
; GFX10: ; %bb.0:
@@ -8288,11 +8767,17 @@ define double @v_minimumnum_f64_nnan_no_ieee(double %x, double %y) #0 {
; GFX8-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_f64_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_f64_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_f64_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3]
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_f64_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8364,11 +8849,17 @@ define <2 x half> @v_minimumnum_v2f16_no_ieee(<2 x half> %x, <2 x half> %y) #0 {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f16_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f16_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f16_no_ieee:
; GFX10: ; %bb.0:
@@ -8456,11 +8947,17 @@ define <2 x half> @v_minimumnum_v2f16_nnan_no_ieee(<2 x half> %x, <2 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v1
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v1
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v2f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8543,19 +9040,33 @@ define <3 x half> @v_minimumnum_v3f16_nnan_no_ieee(<3 x half> %x, <3 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX900-SDAG: ; %bb.0:
+; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX900-GISEL: ; %bb.0:
+; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-SDAG-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX950-SDAG: ; %bb.0:
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-SDAG-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-SDAG-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX950-GISEL-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
+; GFX950-GISEL: ; %bb.0:
+; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-GISEL-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-GISEL-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v3f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8654,12 +9165,19 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_min_f16 v0, v0, v2
-; GFX9-NEXT: v_pk_min_f16 v1, v1, v3
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX900-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: v_pk_min_f16 v0, v0, v2
+; GFX950-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_minimumnum_v4f16_nnan_no_ieee:
; GFX10: ; %bb.0:
@@ -8690,6 +9208,3 @@ define <4 x half> @v_minimumnum_v4f16_nnan_no_ieee(<4 x half> %x, <4 x half> %y)
}
attributes #0 = { "amdgpu-ieee"="false" }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX900: {{.*}}
-; GFX950: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
index bef38c1a65ef8..a338bcbd3ba5c 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
@@ -181,52 +181,52 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX900-NEXT: global_store_dwordx4 v0, v[1:4], s[0:1] offset:16
; GFX900-NEXT: s_endpgm
;
-; PACKED-SDAG-LABEL: fadd_v32_vs:
-; PACKED-SDAG: ; %bb.0:
-; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v32, 7, v0
-; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1]
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:32
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:64
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96
-; PACKED-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4
-; PACKED-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xe4
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[40:41]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[2:3], v[2:3], s[42:43]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(6)
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[6:7], v[6:7], s[38:39]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(5)
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[8:9], v[8:9], s[48:49]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[10:11], v[10:11], s[50:51]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(4)
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[16:17], v[16:17], s[44:45]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[18:19], v[18:19], s[46:47]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[28:29], v[28:29], s[16:17]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[30:31], v[30:31], s[18:19]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[20:21], v[20:21], s[12:13]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[22:23], v[22:23], s[14:15]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[14:15], v[14:15], s[10:11]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[24:25], v[24:25], s[20:21]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[26:27], v[26:27], s[22:23]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[4:5], v[4:5], s[36:37]
-; PACKED-SDAG-NEXT: v_pk_add_f32 v[12:13], v[12:13], s[8:9]
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:64
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1]
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16
-; PACKED-SDAG-NEXT: s_endpgm
+; GFX90A-SDAG-LABEL: fadd_v32_vs:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX90A-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v32, 7, v0
+; GFX90A-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1]
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:32
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:64
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96
+; GFX90A-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4
+; GFX90A-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xe4
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[40:41]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[2:3], v[2:3], s[42:43]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(6)
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[6:7], v[6:7], s[38:39]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[8:9], v[8:9], s[48:49]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[10:11], v[10:11], s[50:51]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[16:17], v[16:17], s[44:45]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[18:19], v[18:19], s[46:47]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[28:29], v[28:29], s[16:17]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[30:31], v[30:31], s[18:19]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[20:21], v[20:21], s[12:13]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[22:23], v[22:23], s[14:15]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[14:15], v[14:15], s[10:11]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[24:25], v[24:25], s[20:21]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[26:27], v[26:27], s[22:23]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[4:5], v[4:5], s[36:37]
+; GFX90A-SDAG-NEXT: v_pk_add_f32 v[12:13], v[12:13], s[8:9]
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:64
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1]
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16
+; GFX90A-SDAG-NEXT: s_endpgm
;
; PACKED-GISEL-LABEL: fadd_v32_vs:
; PACKED-GISEL: ; %bb.0:
@@ -277,6 +277,56 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; PACKED-GISEL-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:96
; PACKED-GISEL-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:112
; PACKED-GISEL-NEXT: s_endpgm
+;
+; GFX942-SDAG-LABEL: fadd_v32_vs:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v36, 7, v0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v36, s[0:1] offset:112
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[12:15], v36, s[0:1] offset:96
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[24:27], v36, s[0:1] offset:80
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[28:31], v36, s[0:1] offset:64
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[32:35], v36, s[0:1] offset:48
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[16:19], v36, s[0:1] offset:32
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[8:11], v36, s[0:1] offset:16
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[4:7], v36, s[0:1]
+; GFX942-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xe4
+; GFX942-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xa4
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[2:3], v[2:3], s[50:51]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(6)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[22:23], v[14:15], s[46:47]
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[20:21], v[12:13], s[44:45]
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[0:1], v[0:1], s[48:49]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[14:15], v[26:27], s[42:43]
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[12:13], v[24:25], s[40:41]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[26:27], v[30:31], s[38:39]
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[24:25], v[28:29], s[36:37]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[30:31], v[34:35], s[22:23]
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[28:29], v[32:33], s[20:21]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[18:19], v[18:19], s[18:19]
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[16:17], v[16:17], s[16:17]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[10:11], v[10:11], s[14:15]
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[8:9], v[8:9], s[12:13]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[6:7], v[6:7], s[10:11]
+; GFX942-SDAG-NEXT: v_pk_add_f32 v[4:5], v[4:5], s[8:9]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[20:23], s[0:1] offset:96
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] offset:112
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[24:27], s[0:1] offset:64
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:80
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[28:31], s[0:1] offset:48
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:16
+; GFX942-SDAG-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
%load = load <32 x float>, ptr addrspace(1) %gep, align 128
@@ -959,52 +1009,52 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX900-NEXT: global_store_dwordx4 v0, v[1:4], s[0:1] offset:16
; GFX900-NEXT: s_endpgm
;
-; PACKED-SDAG-LABEL: fmul_v32_vs:
-; PACKED-SDAG: ; %bb.0:
-; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v32, 7, v0
-; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1]
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:32
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:64
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96
-; PACKED-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4
-; PACKED-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xe4
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[0:1], v[0:1], s[40:41]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[2:3], v[2:3], s[42:43]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(6)
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[6:7], v[6:7], s[38:39]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(5)
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[8:9], v[8:9], s[48:49]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[10:11], v[10:11], s[50:51]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(4)
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[16:17], v[16:17], s[44:45]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[18:19], v[18:19], s[46:47]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[28:29], v[28:29], s[16:17]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[30:31], v[30:31], s[18:19]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[20:21], v[20:21], s[12:13]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[22:23], v[22:23], s[14:15]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[14:15], v[14:15], s[10:11]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[24:25], v[24:25], s[20:21]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[26:27], v[26:27], s[22:23]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[4:5], v[4:5], s[36:37]
-; PACKED-SDAG-NEXT: v_pk_mul_f32 v[12:13], v[12:13], s[8:9]
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:64
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1]
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16
-; PACKED-SDAG-NEXT: s_endpgm
+; GFX90A-SDAG-LABEL: fmul_v32_vs:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX90A-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v32, 7, v0
+; GFX90A-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1]
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:32
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:64
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96
+; GFX90A-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4
+; GFX90A-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xe4
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[0:1], v[0:1], s[40:41]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[2:3], v[2:3], s[42:43]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(6)
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[6:7], v[6:7], s[38:39]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[8:9], v[8:9], s[48:49]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[10:11], v[10:11], s[50:51]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[16:17], v[16:17], s[44:45]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[18:19], v[18:19], s[46:47]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[28:29], v[28:29], s[16:17]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[30:31], v[30:31], s[18:19]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[20:21], v[20:21], s[12:13]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[22:23], v[22:23], s[14:15]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[14:15], v[14:15], s[10:11]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[24:25], v[24:25], s[20:21]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[26:27], v[26:27], s[22:23]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[4:5], v[4:5], s[36:37]
+; GFX90A-SDAG-NEXT: v_pk_mul_f32 v[12:13], v[12:13], s[8:9]
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:64
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1]
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16
+; GFX90A-SDAG-NEXT: s_endpgm
;
; PACKED-GISEL-LABEL: fmul_v32_vs:
; PACKED-GISEL: ; %bb.0:
@@ -1055,6 +1105,56 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; PACKED-GISEL-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:96
; PACKED-GISEL-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:112
; PACKED-GISEL-NEXT: s_endpgm
+;
+; GFX942-SDAG-LABEL: fmul_v32_vs:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v36, 7, v0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v36, s[0:1] offset:112
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[12:15], v36, s[0:1] offset:96
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[24:27], v36, s[0:1] offset:80
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[28:31], v36, s[0:1] offset:64
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[32:35], v36, s[0:1] offset:48
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[16:19], v36, s[0:1] offset:32
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[8:11], v36, s[0:1] offset:16
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[4:7], v36, s[0:1]
+; GFX942-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xe4
+; GFX942-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xa4
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[2:3], v[2:3], s[50:51]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(6)
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[22:23], v[14:15], s[46:47]
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[20:21], v[12:13], s[44:45]
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[0:1], v[0:1], s[48:49]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[14:15], v[26:27], s[42:43]
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[12:13], v[24:25], s[40:41]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[26:27], v[30:31], s[38:39]
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[24:25], v[28:29], s[36:37]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[30:31], v[34:35], s[22:23]
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[28:29], v[32:33], s[20:21]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[18:19], v[18:19], s[18:19]
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[16:17], v[16:17], s[16:17]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[10:11], v[10:11], s[14:15]
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[8:9], v[8:9], s[12:13]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[6:7], v[6:7], s[10:11]
+; GFX942-SDAG-NEXT: v_pk_mul_f32 v[4:5], v[4:5], s[8:9]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[20:23], s[0:1] offset:96
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[0:3], s[0:1] offset:112
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[24:27], s[0:1] offset:64
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[12:15], s[0:1] offset:80
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[16:19], s[0:1] offset:32
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[28:31], s[0:1] offset:48
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[4:7], s[0:1]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v36, v[8:11], s[0:1] offset:16
+; GFX942-SDAG-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
%load = load <32 x float>, ptr addrspace(1) %gep, align 128
@@ -1466,52 +1566,52 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; GFX900-NEXT: global_store_dwordx4 v0, v[1:4], s[0:1] offset:16
; GFX900-NEXT: s_endpgm
;
-; PACKED-SDAG-LABEL: fma_v32_vs:
-; PACKED-SDAG: ; %bb.0:
-; PACKED-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; PACKED-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; PACKED-SDAG-NEXT: v_lshlrev_b32_e32 v32, 7, v0
-; PACKED-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1]
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:32
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:64
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112
-; PACKED-SDAG-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96
-; PACKED-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4
-; PACKED-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xe4
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[40:41], s[40:41]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[2:3], v[2:3], s[42:43], s[42:43]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(6)
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[6:7], v[6:7], s[38:39], s[38:39]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(5)
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[8:9], v[8:9], s[48:49], s[48:49]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[10:11], v[10:11], s[50:51], s[50:51]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(4)
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[16:17], v[16:17], s[44:45], s[44:45]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[18:19], v[18:19], s[46:47], s[46:47]
-; PACKED-SDAG-NEXT: s_waitcnt vmcnt(0)
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[28:29], v[28:29], s[16:17], s[16:17]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[30:31], v[30:31], s[18:19], s[18:19]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[20:21], v[20:21], s[12:13], s[12:13]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[22:23], v[22:23], s[14:15], s[14:15]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[14:15], v[14:15], s[10:11], s[10:11]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[24:25], v[24:25], s[20:21], s[20:21]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[26:27], v[26:27], s[22:23], s[22:23]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[4:5], v[4:5], s[36:37], s[36:37]
-; PACKED-SDAG-NEXT: v_pk_fma_f32 v[12:13], v[12:13], s[8:9], s[8:9]
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:64
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1]
-; PACKED-SDAG-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16
-; PACKED-SDAG-NEXT: s_endpgm
+; GFX90A-SDAG-LABEL: fma_v32_vs:
+; GFX90A-SDAG: ; %bb.0:
+; GFX90A-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX90A-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX90A-SDAG-NEXT: v_lshlrev_b32_e32 v32, 7, v0
+; GFX90A-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[0:3], v32, s[0:1] offset:16
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[4:7], v32, s[0:1]
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[8:11], v32, s[0:1] offset:48
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[16:19], v32, s[0:1] offset:32
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[20:23], v32, s[0:1] offset:80
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[12:15], v32, s[0:1] offset:64
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[24:27], v32, s[0:1] offset:112
+; GFX90A-SDAG-NEXT: global_load_dwordx4 v[28:31], v32, s[0:1] offset:96
+; GFX90A-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xa4
+; GFX90A-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xe4
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[40:41], s[40:41]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[2:3], v[2:3], s[42:43], s[42:43]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(6)
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[6:7], v[6:7], s[38:39], s[38:39]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[8:9], v[8:9], s[48:49], s[48:49]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[10:11], v[10:11], s[50:51], s[50:51]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[16:17], v[16:17], s[44:45], s[44:45]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[18:19], v[18:19], s[46:47], s[46:47]
+; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[28:29], v[28:29], s[16:17], s[16:17]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[30:31], v[30:31], s[18:19], s[18:19]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[20:21], v[20:21], s[12:13], s[12:13]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[22:23], v[22:23], s[14:15], s[14:15]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[14:15], v[14:15], s[10:11], s[10:11]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[24:25], v[24:25], s[20:21], s[20:21]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[26:27], v[26:27], s[22:23], s[22:23]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[4:5], v[4:5], s[36:37], s[36:37]
+; GFX90A-SDAG-NEXT: v_pk_fma_f32 v[12:13], v[12:13], s[8:9], s[8:9]
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:96
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:112
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:64
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:48
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1]
+; GFX90A-SDAG-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] offset:16
+; GFX90A-SDAG-NEXT: s_endpgm
;
; PACKED-GISEL-LABEL: fma_v32_vs:
; PACKED-GISEL: ; %bb.0:
@@ -1562,6 +1662,56 @@ define amdgpu_kernel void @fma_v32_vs(ptr addrspace(1) %a, <32 x float> %x) {
; PACKED-GISEL-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:96
; PACKED-GISEL-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:112
; PACKED-GISEL-NEXT: s_endpgm
+;
+; GFX942-SDAG-LABEL: fma_v32_vs:
+; GFX942-SDAG: ; %bb.0:
+; GFX942-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX942-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX942-SDAG-NEXT: v_lshlrev_b32_e32 v24, 7, v0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:112
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[12:15], v24, s[0:1] offset:96
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[26:29], v24, s[0:1] offset:80
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[30:33], v24, s[0:1] offset:64
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[34:37], v24, s[0:1] offset:48
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[20:23], v24, s[0:1] offset:32
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[8:11], v24, s[0:1] offset:16
+; GFX942-SDAG-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1]
+; GFX942-SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0xe4
+; GFX942-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0xa4
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(7) lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[2:3], v[2:3], s[50:51], s[50:51]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(6)
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[18:19], v[14:15], s[46:47], s[46:47]
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[16:17], v[12:13], s[44:45], s[44:45]
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[0:1], v[0:1], s[48:49], s[48:49]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(5)
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[14:15], v[28:29], s[42:43], s[42:43]
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[12:13], v[26:27], s[40:41], s[40:41]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(4)
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[28:29], v[32:33], s[38:39], s[38:39]
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[26:27], v[30:31], s[36:37], s[36:37]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(3)
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[32:33], v[36:37], s[22:23], s[22:23]
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[30:31], v[34:35], s[20:21], s[20:21]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(2)
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[22:23], v[22:23], s[18:19], s[18:19]
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[20:21], v[20:21], s[16:17], s[16:17]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(1)
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[10:11], v[10:11], s[14:15], s[14:15]
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[8:9], v[8:9], s[12:13], s[12:13]
+; GFX942-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[6:7], v[6:7], s[10:11], s[10:11]
+; GFX942-SDAG-NEXT: v_pk_fma_f32 v[4:5], v[4:5], s[8:9], s[8:9]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:96
+; GFX942-SDAG-NEXT: global_store_dwordx4 v24, v[0:3], s[0:1] offset:112
+; GFX942-SDAG-NEXT: global_store_dwordx4 v24, v[26:29], s[0:1] offset:64
+; GFX942-SDAG-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:80
+; GFX942-SDAG-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:32
+; GFX942-SDAG-NEXT: global_store_dwordx4 v24, v[30:33], s[0:1] offset:48
+; GFX942-SDAG-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1]
+; GFX942-SDAG-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] offset:16
+; GFX942-SDAG-NEXT: s_endpgm
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %a, i32 %id
%load = load <32 x float>, ptr addrspace(1) %gep, align 128
@@ -2408,6 +2558,3 @@ declare i32 @llvm.amdgcn.workitem.id.x()
declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
declare <32 x float> @llvm.fma.v32f32(<32 x float>, <32 x float>, <32 x float>)
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX90A-SDAG: {{.*}}
-; GFX942-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
index 41fe0d48ec819..39544dd7af50c 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs.ll
@@ -476,10 +476,10 @@ define amdgpu_kernel void @v3f32_preload_arg(ptr addrspace(1) nocapture inreg %o
; GFX942-NEXT: .p2align 8
; GFX942-NEXT: ; %bb.2:
; GFX942-NEXT: .LBB12_0:
-; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: v_mov_b32_e32 v0, s6
; GFX942-NEXT: v_mov_b32_e32 v1, s7
; GFX942-NEXT: v_mov_b32_e32 v2, s8
+; GFX942-NEXT: v_mov_b32_e32 v3, 0
; GFX942-NEXT: global_store_dwordx3 v3, v[0:2], s[2:3]
; GFX942-NEXT: s_endpgm
;
@@ -990,10 +990,8 @@ define amdgpu_kernel void @fp128_kernel_preload_arg(ptr addrspace(1) inreg %out,
; GFX942-NEXT: ; %bb.2:
; GFX942-NEXT: .LBB25_0:
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, s6
-; GFX942-NEXT: v_mov_b32_e32 v1, s7
-; GFX942-NEXT: v_mov_b32_e32 v2, s8
-; GFX942-NEXT: v_mov_b32_e32 v3, s9
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], s[8:9]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3]
; GFX942-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
index 51dc9a51ec9d0..3fd7f1cb481a4 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v2i64.ll
@@ -88,8 +88,7 @@ define void @v_shuffle_v2i64_v2i64__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -144,8 +143,7 @@ define void @v_shuffle_v2i64_v2i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -204,10 +202,8 @@ define void @v_shuffle_v2i64_v2i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -262,8 +258,7 @@ define void @v_shuffle_v2i64_v2i64__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -309,8 +304,7 @@ define void @v_shuffle_v2i64_v2i64__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -355,8 +349,7 @@ define void @v_shuffle_v2i64_v2i64__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -401,8 +394,7 @@ define void @v_shuffle_v2i64_v2i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -446,8 +438,7 @@ define void @v_shuffle_v2i64_v2i64__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -492,8 +483,7 @@ define void @v_shuffle_v2i64_v2i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -537,8 +527,7 @@ define void @v_shuffle_v2i64_v2i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -660,8 +649,7 @@ define void @v_shuffle_v2i64_v2i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -794,8 +782,7 @@ define void @v_shuffle_v2i64_v2i64__1_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -900,8 +887,7 @@ define void @v_shuffle_v2i64_v2i64__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -956,8 +942,7 @@ define void @v_shuffle_v2i64_v2i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1095,8 +1080,7 @@ define void @s_shuffle_v2i64_v2i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1154,8 +1138,7 @@ define void @s_shuffle_v2i64_v2i64__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1213,10 +1196,8 @@ define void @s_shuffle_v2i64_v2i64__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1270,8 +1251,7 @@ define void @s_shuffle_v2i64_v2i64__3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1320,10 +1300,8 @@ define void @s_shuffle_v2i64_v2i64__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1336,18 +1314,43 @@ define void @s_shuffle_v2i64_v2i64__3_2() {
}
define void @s_shuffle_v2i64_v2i64__3_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v2i64__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v2i64__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v2i64__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v2i64__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <2 x i32> <i32 3, i32 3>
@@ -1388,8 +1391,7 @@ define void @s_shuffle_v2i64_v2i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1401,18 +1403,43 @@ define void @s_shuffle_v2i64_v2i64__u_0() {
}
define void @s_shuffle_v2i64_v2i64__0_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v2i64__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v2i64__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v2i64__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v2i64__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -1456,10 +1483,8 @@ define void @s_shuffle_v2i64_v2i64__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1503,8 +1528,7 @@ define void @s_shuffle_v2i64_v2i64__2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1596,18 +1620,43 @@ define void @s_shuffle_v2i64_v2i64__0_1() {
}
define void @s_shuffle_v2i64_v2i64__1_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v2i64__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v2i64__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v2i64__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v2i64__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -1741,8 +1790,7 @@ define void @s_shuffle_v2i64_v2i64__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1850,8 +1898,7 @@ define void @s_shuffle_v2i64_v2i64__0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1905,8 +1952,7 @@ define void @s_shuffle_v2i64_v2i64__1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
index bc8a56a30d8f9..f54d45b1367cc 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v3i64.ll
@@ -127,8 +127,7 @@ define void @v_shuffle_v2i64_v3i64__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -223,8 +222,7 @@ define void @v_shuffle_v2i64_v3i64__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -283,10 +281,8 @@ define void @v_shuffle_v2i64_v3i64__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -341,8 +337,7 @@ define void @v_shuffle_v2i64_v3i64__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -397,8 +392,7 @@ define void @v_shuffle_v2i64_v3i64__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -447,10 +441,8 @@ define void @v_shuffle_v2i64_v3i64__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -495,8 +487,7 @@ define void @v_shuffle_v2i64_v3i64__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -541,8 +532,7 @@ define void @v_shuffle_v2i64_v3i64__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -587,8 +577,7 @@ define void @v_shuffle_v2i64_v3i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -632,8 +621,7 @@ define void @v_shuffle_v2i64_v3i64__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -677,8 +665,7 @@ define void @v_shuffle_v2i64_v3i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -726,10 +713,8 @@ define void @v_shuffle_v2i64_v3i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -773,8 +758,7 @@ define void @v_shuffle_v2i64_v3i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -828,8 +812,7 @@ define void @v_shuffle_v2i64_v3i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -952,8 +935,7 @@ define void @v_shuffle_v2i64_v3i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -997,8 +979,7 @@ define void @v_shuffle_v2i64_v3i64__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1091,8 +1072,7 @@ define void @v_shuffle_v2i64_v3i64__4_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1176,8 +1156,7 @@ define void @v_shuffle_v2i64_v3i64__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1260,8 +1239,7 @@ define void @v_shuffle_v2i64_v3i64__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1354,8 +1332,7 @@ define void @v_shuffle_v2i64_v3i64__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1489,8 +1466,7 @@ define void @v_shuffle_v2i64_v3i64__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1545,8 +1521,7 @@ define void @v_shuffle_v2i64_v3i64__4_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1641,8 +1616,7 @@ define void @v_shuffle_v2i64_v3i64__0_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1697,8 +1671,7 @@ define void @v_shuffle_v2i64_v3i64__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1753,8 +1726,7 @@ define void @v_shuffle_v2i64_v3i64__2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1839,8 +1811,7 @@ define void @v_shuffle_v2i64_v3i64__4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1935,8 +1906,7 @@ define void @v_shuffle_v2i64_v3i64__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1991,8 +1961,7 @@ define void @v_shuffle_v2i64_v3i64__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2047,8 +2016,7 @@ define void @v_shuffle_v2i64_v3i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2093,8 +2061,7 @@ define void @v_shuffle_v2i64_v3i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2232,8 +2199,7 @@ define void @s_shuffle_v2i64_v3i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2273,8 +2239,7 @@ define void @s_shuffle_v2i64_v3i64__2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2332,8 +2297,7 @@ define void @s_shuffle_v2i64_v3i64__4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2374,8 +2338,7 @@ define void @s_shuffle_v2i64_v3i64__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2430,11 +2393,11 @@ define void @s_shuffle_v2i64_v3i64__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2484,8 +2447,7 @@ define void @s_shuffle_v2i64_v3i64__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2539,10 +2501,8 @@ define void @s_shuffle_v2i64_v3i64__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2587,10 +2547,8 @@ define void @s_shuffle_v2i64_v3i64__5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2603,18 +2561,43 @@ define void @s_shuffle_v2i64_v3i64__5_3() {
}
define void @s_shuffle_v2i64_v3i64__5_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__5_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__5_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__5_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__5_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> <i32 5, i32 4>
@@ -2659,10 +2642,8 @@ define void @s_shuffle_v2i64_v3i64__5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2707,8 +2688,7 @@ define void @s_shuffle_v2i64_v3i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2720,18 +2700,43 @@ define void @s_shuffle_v2i64_v3i64__u_0() {
}
define void @s_shuffle_v2i64_v3i64__0_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -2775,10 +2780,8 @@ define void @s_shuffle_v2i64_v3i64__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2822,10 +2825,8 @@ define void @s_shuffle_v2i64_v3i64__2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2869,8 +2870,7 @@ define void @s_shuffle_v2i64_v3i64__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2924,13 +2924,11 @@ define void @s_shuffle_v2i64_v3i64__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3023,18 +3021,43 @@ define void @s_shuffle_v2i64_v3i64__0_1() {
}
define void @s_shuffle_v2i64_v3i64__1_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -3042,18 +3065,43 @@ define void @s_shuffle_v2i64_v3i64__1_1() {
}
define void @s_shuffle_v2i64_v3i64__2_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -3142,8 +3190,7 @@ define void @s_shuffle_v2i64_v3i64__4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3188,8 +3235,7 @@ define void @s_shuffle_v2i64_v3i64__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3201,18 +3247,43 @@ define void @s_shuffle_v2i64_v3i64__u_2() {
}
define void @s_shuffle_v2i64_v3i64__0_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -3256,10 +3327,8 @@ define void @s_shuffle_v2i64_v3i64__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3307,10 +3376,8 @@ define void @s_shuffle_v2i64_v3i64__2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3354,8 +3421,7 @@ define void @s_shuffle_v2i64_v3i64__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3412,10 +3478,8 @@ define void @s_shuffle_v2i64_v3i64__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3514,8 +3578,7 @@ define void @s_shuffle_v2i64_v3i64__1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3555,8 +3618,7 @@ define void @s_shuffle_v2i64_v3i64__2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3618,10 +3680,8 @@ define void @s_shuffle_v2i64_v3i64__4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3716,8 +3776,7 @@ define void @s_shuffle_v2i64_v3i64__0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3771,8 +3830,7 @@ define void @s_shuffle_v2i64_v3i64__1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3826,8 +3884,7 @@ define void @s_shuffle_v2i64_v3i64__2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3881,18 +3938,43 @@ define void @s_shuffle_v2i64_v3i64__3_4() {
}
define void @s_shuffle_v2i64_v3i64__4_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> <i32 4, i32 4>
@@ -3933,8 +4015,7 @@ define void @s_shuffle_v2i64_v3i64__u_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3988,8 +4069,7 @@ define void @s_shuffle_v2i64_v3i64__0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4047,10 +4127,8 @@ define void @s_shuffle_v2i64_v3i64__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4104,10 +4182,8 @@ define void @s_shuffle_v2i64_v3i64__2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4120,18 +4196,43 @@ define void @s_shuffle_v2i64_v3i64__2_5() {
}
define void @s_shuffle_v2i64_v3i64__3_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v3i64__3_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v3i64__3_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v3i64__3_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v3i64__3_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <2 x i32> <i32 3, i32 5>
@@ -4176,10 +4277,8 @@ define void @s_shuffle_v2i64_v3i64__4_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
index dd42a1dd44320..1c738b8f4f1d9 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v4i64.ll
@@ -166,8 +166,7 @@ define void @v_shuffle_v2i64_v4i64__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -302,8 +301,7 @@ define void @v_shuffle_v2i64_v4i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -362,10 +360,8 @@ define void @v_shuffle_v2i64_v4i64__7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -420,8 +416,7 @@ define void @v_shuffle_v2i64_v4i64__7_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -476,8 +471,7 @@ define void @v_shuffle_v2i64_v4i64__7_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -532,8 +526,7 @@ define void @v_shuffle_v2i64_v4i64__7_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -582,10 +575,8 @@ define void @v_shuffle_v2i64_v4i64__7_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -630,8 +621,7 @@ define void @v_shuffle_v2i64_v4i64__7_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -676,8 +666,7 @@ define void @v_shuffle_v2i64_v4i64__7_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -722,8 +711,7 @@ define void @v_shuffle_v2i64_v4i64__7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -768,8 +756,7 @@ define void @v_shuffle_v2i64_v4i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -813,8 +800,7 @@ define void @v_shuffle_v2i64_v4i64__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -858,8 +844,7 @@ define void @v_shuffle_v2i64_v4i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -903,8 +888,7 @@ define void @v_shuffle_v2i64_v4i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -952,10 +936,8 @@ define void @v_shuffle_v2i64_v4i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -999,8 +981,7 @@ define void @v_shuffle_v2i64_v4i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1054,8 +1035,7 @@ define void @v_shuffle_v2i64_v4i64__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1110,8 +1090,7 @@ define void @v_shuffle_v2i64_v4i64__6_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1234,8 +1213,7 @@ define void @v_shuffle_v2i64_v4i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1279,8 +1257,7 @@ define void @v_shuffle_v2i64_v4i64__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1324,8 +1301,7 @@ define void @v_shuffle_v2i64_v4i64__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1418,8 +1394,7 @@ define void @v_shuffle_v2i64_v4i64__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1474,8 +1449,7 @@ define void @v_shuffle_v2i64_v4i64__6_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1559,8 +1533,7 @@ define void @v_shuffle_v2i64_v4i64__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1643,8 +1616,7 @@ define void @v_shuffle_v2i64_v4i64__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1688,8 +1660,7 @@ define void @v_shuffle_v2i64_v4i64__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1782,8 +1753,7 @@ define void @v_shuffle_v2i64_v4i64__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1838,8 +1808,7 @@ define void @v_shuffle_v2i64_v4i64__6_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1923,8 +1892,7 @@ define void @v_shuffle_v2i64_v4i64__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1968,8 +1936,7 @@ define void @v_shuffle_v2i64_v4i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2052,8 +2019,7 @@ define void @v_shuffle_v2i64_v4i64__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2146,8 +2112,7 @@ define void @v_shuffle_v2i64_v4i64__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2202,8 +2167,7 @@ define void @v_shuffle_v2i64_v4i64__6_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2376,8 +2340,7 @@ define void @v_shuffle_v2i64_v4i64__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2432,8 +2395,7 @@ define void @v_shuffle_v2i64_v4i64__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2478,8 +2440,7 @@ define void @v_shuffle_v2i64_v4i64__6_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2574,8 +2535,7 @@ define void @v_shuffle_v2i64_v4i64__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2630,8 +2590,7 @@ define void @v_shuffle_v2i64_v4i64__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2686,8 +2645,7 @@ define void @v_shuffle_v2i64_v4i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2742,8 +2700,7 @@ define void @v_shuffle_v2i64_v4i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2828,8 +2785,7 @@ define void @v_shuffle_v2i64_v4i64__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2874,8 +2830,7 @@ define void @v_shuffle_v2i64_v4i64__6_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2970,8 +2925,7 @@ define void @v_shuffle_v2i64_v4i64__0_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3026,8 +2980,7 @@ define void @v_shuffle_v2i64_v4i64__1_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3082,8 +3035,7 @@ define void @v_shuffle_v2i64_v4i64__2_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3138,8 +3090,7 @@ define void @v_shuffle_v2i64_v4i64__3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3184,8 +3135,7 @@ define void @v_shuffle_v2i64_v4i64__4_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3270,8 +3220,7 @@ define void @v_shuffle_v2i64_v4i64__6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3366,8 +3315,7 @@ define void @v_shuffle_v2i64_v4i64__0_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3422,8 +3370,7 @@ define void @v_shuffle_v2i64_v4i64__1_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3478,8 +3425,7 @@ define void @v_shuffle_v2i64_v4i64__2_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3534,8 +3480,7 @@ define void @v_shuffle_v2i64_v4i64__3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3580,8 +3525,7 @@ define void @v_shuffle_v2i64_v4i64__4_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3626,8 +3570,7 @@ define void @v_shuffle_v2i64_v4i64__5_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3765,8 +3708,7 @@ define void @s_shuffle_v2i64_v4i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3850,8 +3792,7 @@ define void @s_shuffle_v2i64_v4i64__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3909,8 +3850,7 @@ define void @s_shuffle_v2i64_v4i64__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3996,8 +3936,7 @@ define void @s_shuffle_v2i64_v4i64__7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4056,10 +3995,8 @@ define void @s_shuffle_v2i64_v4i64__7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4113,8 +4050,7 @@ define void @s_shuffle_v2i64_v4i64__7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4172,10 +4108,8 @@ define void @s_shuffle_v2i64_v4i64__7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4230,8 +4164,7 @@ define void @s_shuffle_v2i64_v4i64__7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4280,10 +4213,8 @@ define void @s_shuffle_v2i64_v4i64__7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4296,18 +4227,43 @@ define void @s_shuffle_v2i64_v4i64__7_4() {
}
define void @s_shuffle_v2i64_v4i64__7_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__7_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__7_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__7_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 7, i32 5>
@@ -4352,10 +4308,8 @@ define void @s_shuffle_v2i64_v4i64__7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4368,18 +4322,43 @@ define void @s_shuffle_v2i64_v4i64__7_6() {
}
define void @s_shuffle_v2i64_v4i64__7_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 7, i32 7>
@@ -4420,8 +4399,7 @@ define void @s_shuffle_v2i64_v4i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4433,18 +4411,43 @@ define void @s_shuffle_v2i64_v4i64__u_0() {
}
define void @s_shuffle_v2i64_v4i64__0_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4488,10 +4491,8 @@ define void @s_shuffle_v2i64_v4i64__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4503,18 +4504,43 @@ define void @s_shuffle_v2i64_v4i64__1_0() {
}
define void @s_shuffle_v2i64_v4i64__2_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4558,10 +4584,8 @@ define void @s_shuffle_v2i64_v4i64__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4605,8 +4629,7 @@ define void @s_shuffle_v2i64_v4i64__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4664,10 +4687,8 @@ define void @s_shuffle_v2i64_v4i64__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4722,8 +4743,7 @@ define void @s_shuffle_v2i64_v4i64__6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4816,18 +4836,43 @@ define void @s_shuffle_v2i64_v4i64__0_1() {
}
define void @s_shuffle_v2i64_v4i64__1_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4835,18 +4880,43 @@ define void @s_shuffle_v2i64_v4i64__1_1() {
}
define void @s_shuffle_v2i64_v4i64__2_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4854,18 +4924,43 @@ define void @s_shuffle_v2i64_v4i64__2_1() {
}
define void @s_shuffle_v2i64_v4i64__3_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__3_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -4954,8 +5049,7 @@ define void @s_shuffle_v2i64_v4i64__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5010,8 +5104,7 @@ define void @s_shuffle_v2i64_v4i64__6_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5056,8 +5149,7 @@ define void @s_shuffle_v2i64_v4i64__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5069,18 +5161,43 @@ define void @s_shuffle_v2i64_v4i64__u_2() {
}
define void @s_shuffle_v2i64_v4i64__0_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5124,10 +5241,8 @@ define void @s_shuffle_v2i64_v4i64__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5139,18 +5254,43 @@ define void @s_shuffle_v2i64_v4i64__1_2() {
}
define void @s_shuffle_v2i64_v4i64__2_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5194,10 +5334,8 @@ define void @s_shuffle_v2i64_v4i64__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5241,8 +5379,7 @@ define void @s_shuffle_v2i64_v4i64__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5299,10 +5436,8 @@ define void @s_shuffle_v2i64_v4i64__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5357,8 +5492,7 @@ define void @s_shuffle_v2i64_v4i64__6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5411,18 +5545,43 @@ define void @s_shuffle_v2i64_v4i64__u_3() {
}
define void @s_shuffle_v2i64_v4i64__0_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5430,18 +5589,43 @@ define void @s_shuffle_v2i64_v4i64__0_3() {
}
define void @s_shuffle_v2i64_v4i64__1_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__1_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__1_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__1_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__1_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5489,18 +5673,43 @@ define void @s_shuffle_v2i64_v4i64__2_3() {
}
define void @s_shuffle_v2i64_v4i64__3_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <2 x i32> <i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -5590,8 +5799,7 @@ define void @s_shuffle_v2i64_v4i64__5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5646,8 +5854,7 @@ define void @s_shuffle_v2i64_v4i64__6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5746,8 +5953,7 @@ define void @s_shuffle_v2i64_v4i64__1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5831,8 +6037,7 @@ define void @s_shuffle_v2i64_v4i64__3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5894,10 +6099,8 @@ define void @s_shuffle_v2i64_v4i64__5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5910,18 +6113,43 @@ define void @s_shuffle_v2i64_v4i64__5_4() {
}
define void @s_shuffle_v2i64_v4i64__6_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 6, i32 4>
@@ -6012,8 +6240,7 @@ define void @s_shuffle_v2i64_v4i64__0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6067,8 +6294,7 @@ define void @s_shuffle_v2i64_v4i64__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6123,8 +6349,7 @@ define void @s_shuffle_v2i64_v4i64__2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6178,8 +6403,7 @@ define void @s_shuffle_v2i64_v4i64__3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6233,18 +6457,43 @@ define void @s_shuffle_v2i64_v4i64__4_5() {
}
define void @s_shuffle_v2i64_v4i64__5_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 5, i32 5>
@@ -6253,18 +6502,43 @@ define void @s_shuffle_v2i64_v4i64__5_5() {
}
define void @s_shuffle_v2i64_v4i64__6_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 6, i32 5>
@@ -6305,8 +6579,7 @@ define void @s_shuffle_v2i64_v4i64__u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6360,8 +6633,7 @@ define void @s_shuffle_v2i64_v4i64__0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6419,10 +6691,8 @@ define void @s_shuffle_v2i64_v4i64__1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6477,8 +6747,7 @@ define void @s_shuffle_v2i64_v4i64__2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6536,10 +6805,8 @@ define void @s_shuffle_v2i64_v4i64__3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6552,18 +6819,43 @@ define void @s_shuffle_v2i64_v4i64__3_6() {
}
define void @s_shuffle_v2i64_v4i64__4_6() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__4_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__4_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__4_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 4, i32 6>
@@ -6608,10 +6900,8 @@ define void @s_shuffle_v2i64_v4i64__5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6624,18 +6914,43 @@ define void @s_shuffle_v2i64_v4i64__5_6() {
}
define void @s_shuffle_v2i64_v4i64__6_6() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 6, i32 6>
@@ -6726,8 +7041,7 @@ define void @s_shuffle_v2i64_v4i64__0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6782,8 +7096,7 @@ define void @s_shuffle_v2i64_v4i64__1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6838,8 +7151,7 @@ define void @s_shuffle_v2i64_v4i64__2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6894,8 +7206,7 @@ define void @s_shuffle_v2i64_v4i64__3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6908,18 +7219,43 @@ define void @s_shuffle_v2i64_v4i64__3_7() {
}
define void @s_shuffle_v2i64_v4i64__4_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__4_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__4_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__4_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__4_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 4, i32 7>
@@ -6928,18 +7264,43 @@ define void @s_shuffle_v2i64_v4i64__4_7() {
}
define void @s_shuffle_v2i64_v4i64__5_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v4i64__5_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v4i64__5_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v4i64__5_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v4i64__5_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <2 x i32> <i32 5, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
index 7ee7c83e0122d..c8aac3a841c69 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2i64.v8i64.ll
@@ -322,8 +322,7 @@ define void @v_shuffle_v2i64_v8i64__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -618,8 +617,7 @@ define void @v_shuffle_v2i64_v8i64__15_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -678,10 +676,8 @@ define void @v_shuffle_v2i64_v8i64__15_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v16
-; GFX942-NEXT: v_mov_b32_e32 v3, v17
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -736,8 +732,7 @@ define void @v_shuffle_v2i64_v8i64__15_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v18
-; GFX942-NEXT: v_mov_b32_e32 v1, v19
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v20, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -792,8 +787,7 @@ define void @v_shuffle_v2i64_v8i64__15_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v20
-; GFX942-NEXT: v_mov_b32_e32 v3, v21
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v22, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -848,8 +842,7 @@ define void @v_shuffle_v2i64_v8i64__15_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v22
-; GFX942-NEXT: v_mov_b32_e32 v5, v23
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v24, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -904,8 +897,7 @@ define void @v_shuffle_v2i64_v8i64__15_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v24
-; GFX942-NEXT: v_mov_b32_e32 v7, v25
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[24:25]
; GFX942-NEXT: global_store_dwordx4 v26, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -960,8 +952,7 @@ define void @v_shuffle_v2i64_v8i64__15_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v26
-; GFX942-NEXT: v_mov_b32_e32 v9, v27
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[26:27]
; GFX942-NEXT: global_store_dwordx4 v28, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1016,8 +1007,7 @@ define void @v_shuffle_v2i64_v8i64__15_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v28
-; GFX942-NEXT: v_mov_b32_e32 v11, v29
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[28:29]
; GFX942-NEXT: global_store_dwordx4 v30, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1072,8 +1062,7 @@ define void @v_shuffle_v2i64_v8i64__15_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[16:31]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v30
-; GFX942-NEXT: v_mov_b32_e32 v13, v31
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[30:31]
; GFX942-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1122,10 +1111,8 @@ define void @v_shuffle_v2i64_v8i64__15_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1170,8 +1157,7 @@ define void @v_shuffle_v2i64_v8i64__15_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1216,8 +1202,7 @@ define void @v_shuffle_v2i64_v8i64__15_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1262,8 +1247,7 @@ define void @v_shuffle_v2i64_v8i64__15_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1308,8 +1292,7 @@ define void @v_shuffle_v2i64_v8i64__15_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1354,8 +1337,7 @@ define void @v_shuffle_v2i64_v8i64__15_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1400,8 +1382,7 @@ define void @v_shuffle_v2i64_v8i64__15_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1446,8 +1427,7 @@ define void @v_shuffle_v2i64_v8i64__15_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1492,8 +1472,7 @@ define void @v_shuffle_v2i64_v8i64__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1537,8 +1516,7 @@ define void @v_shuffle_v2i64_v8i64__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1582,8 +1560,7 @@ define void @v_shuffle_v2i64_v8i64__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1627,8 +1604,7 @@ define void @v_shuffle_v2i64_v8i64__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1672,8 +1648,7 @@ define void @v_shuffle_v2i64_v8i64__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1717,8 +1692,7 @@ define void @v_shuffle_v2i64_v8i64__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1762,8 +1736,7 @@ define void @v_shuffle_v2i64_v8i64__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1807,8 +1780,7 @@ define void @v_shuffle_v2i64_v8i64__6_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v0
-; GFX942-NEXT: v_mov_b32_e32 v15, v1
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1856,10 +1828,8 @@ define void @v_shuffle_v2i64_v8i64__7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1903,8 +1873,7 @@ define void @v_shuffle_v2i64_v8i64__8_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1958,8 +1927,7 @@ define void @v_shuffle_v2i64_v8i64__9_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2014,8 +1982,7 @@ define void @v_shuffle_v2i64_v8i64__10_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2070,8 +2037,7 @@ define void @v_shuffle_v2i64_v8i64__11_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2126,8 +2092,7 @@ define void @v_shuffle_v2i64_v8i64__12_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2182,8 +2147,7 @@ define void @v_shuffle_v2i64_v8i64__13_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v0
-; GFX942-NEXT: v_mov_b32_e32 v15, v1
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2238,8 +2202,7 @@ define void @v_shuffle_v2i64_v8i64__14_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v0
-; GFX942-NEXT: v_mov_b32_e32 v17, v1
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v18, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2362,8 +2325,7 @@ define void @v_shuffle_v2i64_v8i64__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2407,8 +2369,7 @@ define void @v_shuffle_v2i64_v8i64__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2452,8 +2413,7 @@ define void @v_shuffle_v2i64_v8i64__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2497,8 +2457,7 @@ define void @v_shuffle_v2i64_v8i64__4_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2542,8 +2501,7 @@ define void @v_shuffle_v2i64_v8i64__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v2
-; GFX942-NEXT: v_mov_b32_e32 v13, v3
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2587,8 +2545,7 @@ define void @v_shuffle_v2i64_v8i64__6_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v2
-; GFX942-NEXT: v_mov_b32_e32 v15, v3
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2632,8 +2589,7 @@ define void @v_shuffle_v2i64_v8i64__7_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2726,8 +2682,7 @@ define void @v_shuffle_v2i64_v8i64__9_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2782,8 +2737,7 @@ define void @v_shuffle_v2i64_v8i64__10_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2838,8 +2792,7 @@ define void @v_shuffle_v2i64_v8i64__11_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v2
-; GFX942-NEXT: v_mov_b32_e32 v13, v3
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2894,8 +2847,7 @@ define void @v_shuffle_v2i64_v8i64__12_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v2
-; GFX942-NEXT: v_mov_b32_e32 v15, v3
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2950,8 +2902,7 @@ define void @v_shuffle_v2i64_v8i64__13_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v2
-; GFX942-NEXT: v_mov_b32_e32 v17, v3
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3006,8 +2957,7 @@ define void @v_shuffle_v2i64_v8i64__14_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v2
-; GFX942-NEXT: v_mov_b32_e32 v19, v3
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v20, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3091,8 +3041,7 @@ define void @v_shuffle_v2i64_v8i64__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3175,8 +3124,7 @@ define void @v_shuffle_v2i64_v8i64__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3220,8 +3168,7 @@ define void @v_shuffle_v2i64_v8i64__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3265,8 +3212,7 @@ define void @v_shuffle_v2i64_v8i64__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3310,8 +3256,7 @@ define void @v_shuffle_v2i64_v8i64__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3355,8 +3300,7 @@ define void @v_shuffle_v2i64_v8i64__6_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v4
-; GFX942-NEXT: v_mov_b32_e32 v15, v5
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3400,8 +3344,7 @@ define void @v_shuffle_v2i64_v8i64__7_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3494,8 +3437,7 @@ define void @v_shuffle_v2i64_v8i64__9_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3550,8 +3492,7 @@ define void @v_shuffle_v2i64_v8i64__10_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3606,8 +3547,7 @@ define void @v_shuffle_v2i64_v8i64__11_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v4
-; GFX942-NEXT: v_mov_b32_e32 v15, v5
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3662,8 +3602,7 @@ define void @v_shuffle_v2i64_v8i64__12_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v4
-; GFX942-NEXT: v_mov_b32_e32 v17, v5
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3718,8 +3657,7 @@ define void @v_shuffle_v2i64_v8i64__13_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v4
-; GFX942-NEXT: v_mov_b32_e32 v19, v5
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3774,8 +3712,7 @@ define void @v_shuffle_v2i64_v8i64__14_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v4
-; GFX942-NEXT: v_mov_b32_e32 v21, v5
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v22, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3859,8 +3796,7 @@ define void @v_shuffle_v2i64_v8i64__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3904,8 +3840,7 @@ define void @v_shuffle_v2i64_v8i64__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3988,8 +3923,7 @@ define void @v_shuffle_v2i64_v8i64__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4033,8 +3967,7 @@ define void @v_shuffle_v2i64_v8i64__4_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4078,8 +4011,7 @@ define void @v_shuffle_v2i64_v8i64__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4123,8 +4055,7 @@ define void @v_shuffle_v2i64_v8i64__6_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4168,8 +4099,7 @@ define void @v_shuffle_v2i64_v8i64__7_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4262,8 +4192,7 @@ define void @v_shuffle_v2i64_v8i64__9_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4318,8 +4247,7 @@ define void @v_shuffle_v2i64_v8i64__10_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4374,8 +4302,7 @@ define void @v_shuffle_v2i64_v8i64__11_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v6
-; GFX942-NEXT: v_mov_b32_e32 v17, v7
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4430,8 +4357,7 @@ define void @v_shuffle_v2i64_v8i64__12_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v6
-; GFX942-NEXT: v_mov_b32_e32 v19, v7
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4486,8 +4412,7 @@ define void @v_shuffle_v2i64_v8i64__13_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v6
-; GFX942-NEXT: v_mov_b32_e32 v21, v7
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4542,8 +4467,7 @@ define void @v_shuffle_v2i64_v8i64__14_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v6
-; GFX942-NEXT: v_mov_b32_e32 v23, v7
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4627,8 +4551,7 @@ define void @v_shuffle_v2i64_v8i64__0_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4672,8 +4595,7 @@ define void @v_shuffle_v2i64_v8i64__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4717,8 +4639,7 @@ define void @v_shuffle_v2i64_v8i64__2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4801,8 +4722,7 @@ define void @v_shuffle_v2i64_v8i64__4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4846,8 +4766,7 @@ define void @v_shuffle_v2i64_v8i64__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v8
-; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4891,8 +4810,7 @@ define void @v_shuffle_v2i64_v8i64__6_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v8
-; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4936,8 +4854,7 @@ define void @v_shuffle_v2i64_v8i64__7_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5030,8 +4947,7 @@ define void @v_shuffle_v2i64_v8i64__9_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v8
-; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5086,8 +5002,7 @@ define void @v_shuffle_v2i64_v8i64__10_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v8
-; GFX942-NEXT: v_mov_b32_e32 v17, v9
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5142,8 +5057,7 @@ define void @v_shuffle_v2i64_v8i64__11_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v8
-; GFX942-NEXT: v_mov_b32_e32 v19, v9
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5198,8 +5112,7 @@ define void @v_shuffle_v2i64_v8i64__12_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v8
-; GFX942-NEXT: v_mov_b32_e32 v21, v9
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5254,8 +5167,7 @@ define void @v_shuffle_v2i64_v8i64__13_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v8
-; GFX942-NEXT: v_mov_b32_e32 v23, v9
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5310,8 +5222,7 @@ define void @v_shuffle_v2i64_v8i64__14_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v24, v8
-; GFX942-NEXT: v_mov_b32_e32 v25, v9
+; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v26, v[22:25], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5395,8 +5306,7 @@ define void @v_shuffle_v2i64_v8i64__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5440,8 +5350,7 @@ define void @v_shuffle_v2i64_v8i64__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5485,8 +5394,7 @@ define void @v_shuffle_v2i64_v8i64__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5530,8 +5438,7 @@ define void @v_shuffle_v2i64_v8i64__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5614,8 +5521,7 @@ define void @v_shuffle_v2i64_v8i64__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v10
-; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5659,8 +5565,7 @@ define void @v_shuffle_v2i64_v8i64__6_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v10
-; GFX942-NEXT: v_mov_b32_e32 v15, v11
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5704,8 +5609,7 @@ define void @v_shuffle_v2i64_v8i64__7_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5798,8 +5702,7 @@ define void @v_shuffle_v2i64_v8i64__9_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v16, v10
-; GFX942-NEXT: v_mov_b32_e32 v17, v11
+; GFX942-NEXT: v_mov_b64_e32 v[16:17], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[14:17], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5854,8 +5757,7 @@ define void @v_shuffle_v2i64_v8i64__10_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v10
-; GFX942-NEXT: v_mov_b32_e32 v19, v11
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5910,8 +5812,7 @@ define void @v_shuffle_v2i64_v8i64__11_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v10
-; GFX942-NEXT: v_mov_b32_e32 v21, v11
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -5966,8 +5867,7 @@ define void @v_shuffle_v2i64_v8i64__12_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v10
-; GFX942-NEXT: v_mov_b32_e32 v23, v11
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6022,8 +5922,7 @@ define void @v_shuffle_v2i64_v8i64__13_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v24, v10
-; GFX942-NEXT: v_mov_b32_e32 v25, v11
+; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[22:25], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6078,8 +5977,7 @@ define void @v_shuffle_v2i64_v8i64__14_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v26, v10
-; GFX942-NEXT: v_mov_b32_e32 v27, v11
+; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v28, v[24:27], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6163,8 +6061,7 @@ define void @v_shuffle_v2i64_v8i64__0_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6208,8 +6105,7 @@ define void @v_shuffle_v2i64_v8i64__1_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v12
-; GFX942-NEXT: v_mov_b32_e32 v5, v13
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6253,8 +6149,7 @@ define void @v_shuffle_v2i64_v8i64__2_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6298,8 +6193,7 @@ define void @v_shuffle_v2i64_v8i64__3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6343,8 +6237,7 @@ define void @v_shuffle_v2i64_v8i64__4_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6427,8 +6320,7 @@ define void @v_shuffle_v2i64_v8i64__6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v12
-; GFX942-NEXT: v_mov_b32_e32 v15, v13
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6472,8 +6364,7 @@ define void @v_shuffle_v2i64_v8i64__7_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6566,8 +6457,7 @@ define void @v_shuffle_v2i64_v8i64__9_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v18, v12
-; GFX942-NEXT: v_mov_b32_e32 v19, v13
+; GFX942-NEXT: v_mov_b64_e32 v[18:19], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[16:19], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6622,8 +6512,7 @@ define void @v_shuffle_v2i64_v8i64__10_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v12
-; GFX942-NEXT: v_mov_b32_e32 v21, v13
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6678,8 +6567,7 @@ define void @v_shuffle_v2i64_v8i64__11_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v12
-; GFX942-NEXT: v_mov_b32_e32 v23, v13
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6734,8 +6622,7 @@ define void @v_shuffle_v2i64_v8i64__12_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v24, v12
-; GFX942-NEXT: v_mov_b32_e32 v25, v13
+; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[22:25], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6790,8 +6677,7 @@ define void @v_shuffle_v2i64_v8i64__13_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v26, v12
-; GFX942-NEXT: v_mov_b32_e32 v27, v13
+; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[24:27], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6846,8 +6732,7 @@ define void @v_shuffle_v2i64_v8i64__14_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v28, v12
-; GFX942-NEXT: v_mov_b32_e32 v29, v13
+; GFX942-NEXT: v_mov_b64_e32 v[28:29], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v30, v[26:29], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6931,8 +6816,7 @@ define void @v_shuffle_v2i64_v8i64__0_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -6976,8 +6860,7 @@ define void @v_shuffle_v2i64_v8i64__1_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7021,8 +6904,7 @@ define void @v_shuffle_v2i64_v8i64__2_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7066,8 +6948,7 @@ define void @v_shuffle_v2i64_v8i64__3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7111,8 +6992,7 @@ define void @v_shuffle_v2i64_v8i64__4_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7156,8 +7036,7 @@ define void @v_shuffle_v2i64_v8i64__5_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7240,8 +7119,7 @@ define void @v_shuffle_v2i64_v8i64__7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7334,8 +7212,7 @@ define void @v_shuffle_v2i64_v8i64__9_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v20, v14
-; GFX942-NEXT: v_mov_b32_e32 v21, v15
+; GFX942-NEXT: v_mov_b64_e32 v[20:21], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[18:21], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7390,8 +7267,7 @@ define void @v_shuffle_v2i64_v8i64__10_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v22, v14
-; GFX942-NEXT: v_mov_b32_e32 v23, v15
+; GFX942-NEXT: v_mov_b64_e32 v[22:23], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7446,8 +7322,7 @@ define void @v_shuffle_v2i64_v8i64__11_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v24, v14
-; GFX942-NEXT: v_mov_b32_e32 v25, v15
+; GFX942-NEXT: v_mov_b64_e32 v[24:25], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[22:25], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7502,8 +7377,7 @@ define void @v_shuffle_v2i64_v8i64__12_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v26, v14
-; GFX942-NEXT: v_mov_b32_e32 v27, v15
+; GFX942-NEXT: v_mov_b64_e32 v[26:27], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7558,8 +7432,7 @@ define void @v_shuffle_v2i64_v8i64__13_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v28, v14
-; GFX942-NEXT: v_mov_b32_e32 v29, v15
+; GFX942-NEXT: v_mov_b64_e32 v[28:29], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[26:29], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7614,8 +7487,7 @@ define void @v_shuffle_v2i64_v8i64__14_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v30, v14
-; GFX942-NEXT: v_mov_b32_e32 v31, v15
+; GFX942-NEXT: v_mov_b64_e32 v[30:31], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7944,8 +7816,7 @@ define void @v_shuffle_v2i64_v8i64__7_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v14
-; GFX942-NEXT: v_mov_b32_e32 v1, v15
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8000,8 +7871,7 @@ define void @v_shuffle_v2i64_v8i64__9_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8046,8 +7916,7 @@ define void @v_shuffle_v2i64_v8i64__10_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8092,8 +7961,7 @@ define void @v_shuffle_v2i64_v8i64__11_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8138,8 +8006,7 @@ define void @v_shuffle_v2i64_v8i64__12_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v0
-; GFX942-NEXT: v_mov_b32_e32 v11, v1
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8184,8 +8051,7 @@ define void @v_shuffle_v2i64_v8i64__13_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v0
-; GFX942-NEXT: v_mov_b32_e32 v13, v1
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8230,8 +8096,7 @@ define void @v_shuffle_v2i64_v8i64__14_8(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v0
-; GFX942-NEXT: v_mov_b32_e32 v15, v1
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8326,8 +8191,7 @@ define void @v_shuffle_v2i64_v8i64__0_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8382,8 +8246,7 @@ define void @v_shuffle_v2i64_v8i64__1_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8438,8 +8301,7 @@ define void @v_shuffle_v2i64_v8i64__2_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8494,8 +8356,7 @@ define void @v_shuffle_v2i64_v8i64__3_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8550,8 +8411,7 @@ define void @v_shuffle_v2i64_v8i64__4_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8606,8 +8466,7 @@ define void @v_shuffle_v2i64_v8i64__5_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8662,8 +8521,7 @@ define void @v_shuffle_v2i64_v8i64__6_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v16
-; GFX942-NEXT: v_mov_b32_e32 v15, v17
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8718,8 +8576,7 @@ define void @v_shuffle_v2i64_v8i64__7_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v16
-; GFX942-NEXT: v_mov_b32_e32 v1, v17
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8804,8 +8661,7 @@ define void @v_shuffle_v2i64_v8i64__9_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8850,8 +8706,7 @@ define void @v_shuffle_v2i64_v8i64__10_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8896,8 +8751,7 @@ define void @v_shuffle_v2i64_v8i64__11_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8942,8 +8796,7 @@ define void @v_shuffle_v2i64_v8i64__12_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -8988,8 +8841,7 @@ define void @v_shuffle_v2i64_v8i64__13_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v2
-; GFX942-NEXT: v_mov_b32_e32 v13, v3
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9034,8 +8886,7 @@ define void @v_shuffle_v2i64_v8i64__14_9(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v2
-; GFX942-NEXT: v_mov_b32_e32 v15, v3
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9130,8 +8981,7 @@ define void @v_shuffle_v2i64_v8i64__0_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9186,8 +9036,7 @@ define void @v_shuffle_v2i64_v8i64__1_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9242,8 +9091,7 @@ define void @v_shuffle_v2i64_v8i64__2_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9298,8 +9146,7 @@ define void @v_shuffle_v2i64_v8i64__3_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9354,8 +9201,7 @@ define void @v_shuffle_v2i64_v8i64__4_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9410,8 +9256,7 @@ define void @v_shuffle_v2i64_v8i64__5_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v16
-; GFX942-NEXT: v_mov_b32_e32 v13, v17
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9466,8 +9311,7 @@ define void @v_shuffle_v2i64_v8i64__6_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v18
-; GFX942-NEXT: v_mov_b32_e32 v15, v19
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9522,8 +9366,7 @@ define void @v_shuffle_v2i64_v8i64__7_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v16
-; GFX942-NEXT: v_mov_b32_e32 v3, v17
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9568,8 +9411,7 @@ define void @v_shuffle_v2i64_v8i64__8_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9654,8 +9496,7 @@ define void @v_shuffle_v2i64_v8i64__10_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9700,8 +9541,7 @@ define void @v_shuffle_v2i64_v8i64__11_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9746,8 +9586,7 @@ define void @v_shuffle_v2i64_v8i64__12_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9792,8 +9631,7 @@ define void @v_shuffle_v2i64_v8i64__13_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9838,8 +9676,7 @@ define void @v_shuffle_v2i64_v8i64__14_10(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v4
-; GFX942-NEXT: v_mov_b32_e32 v15, v5
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9934,8 +9771,7 @@ define void @v_shuffle_v2i64_v8i64__0_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -9990,8 +9826,7 @@ define void @v_shuffle_v2i64_v8i64__1_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10046,8 +9881,7 @@ define void @v_shuffle_v2i64_v8i64__2_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10102,8 +9936,7 @@ define void @v_shuffle_v2i64_v8i64__3_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10158,8 +9991,7 @@ define void @v_shuffle_v2i64_v8i64__4_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v16
-; GFX942-NEXT: v_mov_b32_e32 v11, v17
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10214,8 +10046,7 @@ define void @v_shuffle_v2i64_v8i64__5_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v18
-; GFX942-NEXT: v_mov_b32_e32 v13, v19
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10270,8 +10101,7 @@ define void @v_shuffle_v2i64_v8i64__6_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v20
-; GFX942-NEXT: v_mov_b32_e32 v15, v21
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10326,8 +10156,7 @@ define void @v_shuffle_v2i64_v8i64__7_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v16
-; GFX942-NEXT: v_mov_b32_e32 v5, v17
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10372,8 +10201,7 @@ define void @v_shuffle_v2i64_v8i64__8_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10418,8 +10246,7 @@ define void @v_shuffle_v2i64_v8i64__9_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10504,8 +10331,7 @@ define void @v_shuffle_v2i64_v8i64__11_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10550,8 +10376,7 @@ define void @v_shuffle_v2i64_v8i64__12_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10596,8 +10421,7 @@ define void @v_shuffle_v2i64_v8i64__13_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10642,8 +10466,7 @@ define void @v_shuffle_v2i64_v8i64__14_11(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10738,8 +10561,7 @@ define void @v_shuffle_v2i64_v8i64__0_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10794,8 +10616,7 @@ define void @v_shuffle_v2i64_v8i64__1_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v12
-; GFX942-NEXT: v_mov_b32_e32 v5, v13
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10850,8 +10671,7 @@ define void @v_shuffle_v2i64_v8i64__2_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10906,8 +10726,7 @@ define void @v_shuffle_v2i64_v8i64__3_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v16
-; GFX942-NEXT: v_mov_b32_e32 v9, v17
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -10962,8 +10781,7 @@ define void @v_shuffle_v2i64_v8i64__4_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v18
-; GFX942-NEXT: v_mov_b32_e32 v11, v19
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11018,8 +10836,7 @@ define void @v_shuffle_v2i64_v8i64__5_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v20
-; GFX942-NEXT: v_mov_b32_e32 v13, v21
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11074,8 +10891,7 @@ define void @v_shuffle_v2i64_v8i64__6_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v22
-; GFX942-NEXT: v_mov_b32_e32 v15, v23
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11130,8 +10946,7 @@ define void @v_shuffle_v2i64_v8i64__7_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v16
-; GFX942-NEXT: v_mov_b32_e32 v7, v17
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11176,8 +10991,7 @@ define void @v_shuffle_v2i64_v8i64__8_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11222,8 +11036,7 @@ define void @v_shuffle_v2i64_v8i64__9_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11268,8 +11081,7 @@ define void @v_shuffle_v2i64_v8i64__10_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11354,8 +11166,7 @@ define void @v_shuffle_v2i64_v8i64__12_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v8
-; GFX942-NEXT: v_mov_b32_e32 v11, v9
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11400,8 +11211,7 @@ define void @v_shuffle_v2i64_v8i64__13_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v8
-; GFX942-NEXT: v_mov_b32_e32 v13, v9
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11446,8 +11256,7 @@ define void @v_shuffle_v2i64_v8i64__14_12(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v8
-; GFX942-NEXT: v_mov_b32_e32 v15, v9
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11542,8 +11351,7 @@ define void @v_shuffle_v2i64_v8i64__0_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11598,8 +11406,7 @@ define void @v_shuffle_v2i64_v8i64__1_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11654,8 +11461,7 @@ define void @v_shuffle_v2i64_v8i64__2_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v16
-; GFX942-NEXT: v_mov_b32_e32 v7, v17
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11710,8 +11516,7 @@ define void @v_shuffle_v2i64_v8i64__3_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v18
-; GFX942-NEXT: v_mov_b32_e32 v9, v19
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11766,8 +11571,7 @@ define void @v_shuffle_v2i64_v8i64__4_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v20
-; GFX942-NEXT: v_mov_b32_e32 v11, v21
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11822,8 +11626,7 @@ define void @v_shuffle_v2i64_v8i64__5_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v22
-; GFX942-NEXT: v_mov_b32_e32 v13, v23
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11878,8 +11681,7 @@ define void @v_shuffle_v2i64_v8i64__6_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v24
-; GFX942-NEXT: v_mov_b32_e32 v15, v25
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[24:25]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11934,8 +11736,7 @@ define void @v_shuffle_v2i64_v8i64__7_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v16
-; GFX942-NEXT: v_mov_b32_e32 v9, v17
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -11980,8 +11781,7 @@ define void @v_shuffle_v2i64_v8i64__8_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12026,8 +11826,7 @@ define void @v_shuffle_v2i64_v8i64__9_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12072,8 +11871,7 @@ define void @v_shuffle_v2i64_v8i64__10_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12118,8 +11916,7 @@ define void @v_shuffle_v2i64_v8i64__11_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v10
-; GFX942-NEXT: v_mov_b32_e32 v9, v11
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12204,8 +12001,7 @@ define void @v_shuffle_v2i64_v8i64__13_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v10
-; GFX942-NEXT: v_mov_b32_e32 v13, v11
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12250,8 +12046,7 @@ define void @v_shuffle_v2i64_v8i64__14_13(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v10
-; GFX942-NEXT: v_mov_b32_e32 v15, v11
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12346,8 +12141,7 @@ define void @v_shuffle_v2i64_v8i64__0_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12402,8 +12196,7 @@ define void @v_shuffle_v2i64_v8i64__1_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v16
-; GFX942-NEXT: v_mov_b32_e32 v5, v17
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12458,8 +12251,7 @@ define void @v_shuffle_v2i64_v8i64__2_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v18
-; GFX942-NEXT: v_mov_b32_e32 v7, v19
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12514,8 +12306,7 @@ define void @v_shuffle_v2i64_v8i64__3_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v20
-; GFX942-NEXT: v_mov_b32_e32 v9, v21
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12570,8 +12361,7 @@ define void @v_shuffle_v2i64_v8i64__4_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v22
-; GFX942-NEXT: v_mov_b32_e32 v11, v23
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12626,8 +12416,7 @@ define void @v_shuffle_v2i64_v8i64__5_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v24
-; GFX942-NEXT: v_mov_b32_e32 v13, v25
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[24:25]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12682,8 +12471,7 @@ define void @v_shuffle_v2i64_v8i64__6_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v26
-; GFX942-NEXT: v_mov_b32_e32 v15, v27
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[26:27]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12738,8 +12526,7 @@ define void @v_shuffle_v2i64_v8i64__7_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v16
-; GFX942-NEXT: v_mov_b32_e32 v11, v17
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12784,8 +12571,7 @@ define void @v_shuffle_v2i64_v8i64__8_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12830,8 +12616,7 @@ define void @v_shuffle_v2i64_v8i64__9_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v12
-; GFX942-NEXT: v_mov_b32_e32 v5, v13
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12876,8 +12661,7 @@ define void @v_shuffle_v2i64_v8i64__10_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12922,8 +12706,7 @@ define void @v_shuffle_v2i64_v8i64__11_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v12
-; GFX942-NEXT: v_mov_b32_e32 v9, v13
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -12968,8 +12751,7 @@ define void @v_shuffle_v2i64_v8i64__12_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v12
-; GFX942-NEXT: v_mov_b32_e32 v11, v13
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13054,8 +12836,7 @@ define void @v_shuffle_v2i64_v8i64__14_14(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v12
-; GFX942-NEXT: v_mov_b32_e32 v15, v13
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13150,8 +12931,7 @@ define void @v_shuffle_v2i64_v8i64__0_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:17]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v16
-; GFX942-NEXT: v_mov_b32_e32 v3, v17
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13206,8 +12986,7 @@ define void @v_shuffle_v2i64_v8i64__1_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v18
-; GFX942-NEXT: v_mov_b32_e32 v5, v19
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[18:19]
; GFX942-NEXT: global_store_dwordx4 v20, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13262,8 +13041,7 @@ define void @v_shuffle_v2i64_v8i64__2_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:21]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v20
-; GFX942-NEXT: v_mov_b32_e32 v7, v21
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[20:21]
; GFX942-NEXT: global_store_dwordx4 v22, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13318,8 +13096,7 @@ define void @v_shuffle_v2i64_v8i64__3_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v22
-; GFX942-NEXT: v_mov_b32_e32 v9, v23
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[22:23]
; GFX942-NEXT: global_store_dwordx4 v24, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13374,8 +13151,7 @@ define void @v_shuffle_v2i64_v8i64__4_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[10:25]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v24
-; GFX942-NEXT: v_mov_b32_e32 v11, v25
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[24:25]
; GFX942-NEXT: global_store_dwordx4 v26, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13430,8 +13206,7 @@ define void @v_shuffle_v2i64_v8i64__5_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v26
-; GFX942-NEXT: v_mov_b32_e32 v13, v27
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[26:27]
; GFX942-NEXT: global_store_dwordx4 v28, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13486,8 +13261,7 @@ define void @v_shuffle_v2i64_v8i64__6_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[14:29]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v28
-; GFX942-NEXT: v_mov_b32_e32 v15, v29
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[28:29]
; GFX942-NEXT: global_store_dwordx4 v30, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13542,8 +13316,7 @@ define void @v_shuffle_v2i64_v8i64__7_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v16
-; GFX942-NEXT: v_mov_b32_e32 v13, v17
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[16:17]
; GFX942-NEXT: global_store_dwordx4 v18, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13588,8 +13361,7 @@ define void @v_shuffle_v2i64_v8i64__8_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v14
-; GFX942-NEXT: v_mov_b32_e32 v3, v15
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13634,8 +13406,7 @@ define void @v_shuffle_v2i64_v8i64__9_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13680,8 +13451,7 @@ define void @v_shuffle_v2i64_v8i64__10_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v14
-; GFX942-NEXT: v_mov_b32_e32 v7, v15
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13726,8 +13496,7 @@ define void @v_shuffle_v2i64_v8i64__11_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v14
-; GFX942-NEXT: v_mov_b32_e32 v9, v15
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13772,8 +13541,7 @@ define void @v_shuffle_v2i64_v8i64__12_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v14
-; GFX942-NEXT: v_mov_b32_e32 v11, v15
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13818,8 +13586,7 @@ define void @v_shuffle_v2i64_v8i64__13_15(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v16, 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v14
-; GFX942-NEXT: v_mov_b32_e32 v13, v15
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -13957,8 +13724,7 @@ define void @s_shuffle_v2i64_v8i64__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14042,8 +13808,7 @@ define void @s_shuffle_v2i64_v8i64__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14131,8 +13896,7 @@ define void @s_shuffle_v2i64_v8i64__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14221,8 +13985,7 @@ define void @s_shuffle_v2i64_v8i64__7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14280,8 +14043,7 @@ define void @s_shuffle_v2i64_v8i64__9_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14367,8 +14129,7 @@ define void @s_shuffle_v2i64_v8i64__11_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14458,8 +14219,7 @@ define void @s_shuffle_v2i64_v8i64__13_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14550,8 +14310,7 @@ define void @s_shuffle_v2i64_v8i64__15_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14610,10 +14369,8 @@ define void @s_shuffle_v2i64_v8i64__15_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14668,8 +14425,7 @@ define void @s_shuffle_v2i64_v8i64__15_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14750,10 +14506,8 @@ define void @s_shuffle_v2i64_v8i64__15_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14808,8 +14562,7 @@ define void @s_shuffle_v2i64_v8i64__15_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14894,10 +14647,8 @@ define void @s_shuffle_v2i64_v8i64__15_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s30
-; GFX942-NEXT: s_mov_b32 s9, s31
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[30:31]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -14984,8 +14735,7 @@ define void @s_shuffle_v2i64_v8i64__15_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15094,10 +14844,8 @@ define void @s_shuffle_v2i64_v8i64__15_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s30
-; GFX942-NEXT: s_mov_b32 s9, s31
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[30:31]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15212,8 +14960,7 @@ define void @s_shuffle_v2i64_v8i64__15_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s30
-; GFX942-NEXT: s_mov_b32 s13, s31
+; GFX942-NEXT: s_mov_b64 s[12:13], s[30:31]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -15270,10 +15017,8 @@ define void @s_shuffle_v2i64_v8i64__15_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15286,18 +15031,43 @@ define void @s_shuffle_v2i64_v8i64__15_8() {
}
define void @s_shuffle_v2i64_v8i64__15_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s22
-; GFX9-NEXT: s_mov_b32 s9, s23
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 15, i32 9>
@@ -15342,10 +15112,8 @@ define void @s_shuffle_v2i64_v8i64__15_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15358,18 +15126,43 @@ define void @s_shuffle_v2i64_v8i64__15_10() {
}
define void @s_shuffle_v2i64_v8i64__15_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 15, i32 11>
@@ -15378,20 +15171,48 @@ define void @s_shuffle_v2i64_v8i64__15_11() {
}
define void @s_shuffle_v2i64_v8i64__15_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__15_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__15_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__15_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__15_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 15, i32 12>
@@ -15436,8 +15257,7 @@ define void @s_shuffle_v2i64_v8i64__15_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15486,10 +15306,8 @@ define void @s_shuffle_v2i64_v8i64__15_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15538,8 +15356,7 @@ define void @s_shuffle_v2i64_v8i64__15_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -15586,8 +15403,7 @@ define void @s_shuffle_v2i64_v8i64__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15599,18 +15415,43 @@ define void @s_shuffle_v2i64_v8i64__u_0() {
}
define void @s_shuffle_v2i64_v8i64__0_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -15654,10 +15495,8 @@ define void @s_shuffle_v2i64_v8i64__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15669,18 +15508,43 @@ define void @s_shuffle_v2i64_v8i64__1_0() {
}
define void @s_shuffle_v2i64_v8i64__2_0() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -15724,10 +15588,8 @@ define void @s_shuffle_v2i64_v8i64__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15775,8 +15637,7 @@ define void @s_shuffle_v2i64_v8i64__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15824,10 +15685,8 @@ define void @s_shuffle_v2i64_v8i64__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15875,8 +15734,7 @@ define void @s_shuffle_v2i64_v8i64__6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -15926,10 +15784,8 @@ define void @s_shuffle_v2i64_v8i64__7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -15973,8 +15829,7 @@ define void @s_shuffle_v2i64_v8i64__8_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16032,10 +15887,8 @@ define void @s_shuffle_v2i64_v8i64__9_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16136,8 +15989,7 @@ define void @s_shuffle_v2i64_v8i64__10_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16196,10 +16048,8 @@ define void @s_shuffle_v2i64_v8i64__11_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16262,8 +16112,7 @@ define void @s_shuffle_v2i64_v8i64__12_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16328,10 +16177,8 @@ define void @s_shuffle_v2i64_v8i64__13_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16390,8 +16237,7 @@ define void @s_shuffle_v2i64_v8i64__14_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s18, s0
-; GFX942-NEXT: s_mov_b32 s19, s1
+; GFX942-NEXT: s_mov_b64 s[18:19], s[0:1]
; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
@@ -16486,18 +16332,43 @@ define void @s_shuffle_v2i64_v8i64__0_1() {
}
define void @s_shuffle_v2i64_v8i64__1_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16505,18 +16376,43 @@ define void @s_shuffle_v2i64_v8i64__1_1() {
}
define void @s_shuffle_v2i64_v8i64__2_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16524,18 +16420,43 @@ define void @s_shuffle_v2i64_v8i64__2_1() {
}
define void @s_shuffle_v2i64_v8i64__3_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16579,8 +16500,7 @@ define void @s_shuffle_v2i64_v8i64__4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16592,18 +16512,43 @@ define void @s_shuffle_v2i64_v8i64__4_1() {
}
define void @s_shuffle_v2i64_v8i64__5_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 5, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16647,8 +16592,7 @@ define void @s_shuffle_v2i64_v8i64__6_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -16662,18 +16606,43 @@ define void @s_shuffle_v2i64_v8i64__6_1() {
}
define void @s_shuffle_v2i64_v8i64__7_1() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s22
-; GFX9-NEXT: s_mov_b32 s9, s23
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 7, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -16763,8 +16732,7 @@ define void @s_shuffle_v2i64_v8i64__9_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16865,8 +16833,7 @@ define void @s_shuffle_v2i64_v8i64__10_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16921,8 +16888,7 @@ define void @s_shuffle_v2i64_v8i64__11_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -16985,8 +16951,7 @@ define void @s_shuffle_v2i64_v8i64__12_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17047,8 +17012,7 @@ define void @s_shuffle_v2i64_v8i64__13_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17107,8 +17071,7 @@ define void @s_shuffle_v2i64_v8i64__14_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s18, s2
-; GFX942-NEXT: s_mov_b32 s19, s3
+; GFX942-NEXT: s_mov_b64 s[18:19], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
@@ -17155,8 +17118,7 @@ define void @s_shuffle_v2i64_v8i64__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17168,18 +17130,43 @@ define void @s_shuffle_v2i64_v8i64__u_2() {
}
define void @s_shuffle_v2i64_v8i64__0_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -17223,10 +17210,8 @@ define void @s_shuffle_v2i64_v8i64__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17238,18 +17223,43 @@ define void @s_shuffle_v2i64_v8i64__1_2() {
}
define void @s_shuffle_v2i64_v8i64__2_2() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -17293,10 +17303,8 @@ define void @s_shuffle_v2i64_v8i64__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17344,8 +17352,7 @@ define void @s_shuffle_v2i64_v8i64__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17393,10 +17400,8 @@ define void @s_shuffle_v2i64_v8i64__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17444,8 +17449,7 @@ define void @s_shuffle_v2i64_v8i64__6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -17495,10 +17499,8 @@ define void @s_shuffle_v2i64_v8i64__7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17542,8 +17544,7 @@ define void @s_shuffle_v2i64_v8i64__8_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17623,10 +17624,8 @@ define void @s_shuffle_v2i64_v8i64__9_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17708,8 +17707,7 @@ define void @s_shuffle_v2i64_v8i64__10_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17796,10 +17794,8 @@ define void @s_shuffle_v2i64_v8i64__11_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17858,8 +17854,7 @@ define void @s_shuffle_v2i64_v8i64__12_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -17940,10 +17935,8 @@ define void @s_shuffle_v2i64_v8i64__13_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18002,8 +17995,7 @@ define void @s_shuffle_v2i64_v8i64__14_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s22, s4
-; GFX942-NEXT: s_mov_b32 s23, s5
+; GFX942-NEXT: s_mov_b64 s[22:23], s[4:5]
; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
@@ -18058,37 +18050,87 @@ define void @s_shuffle_v2i64_v8i64__u_3() {
}
define void @s_shuffle_v2i64_v8i64__0_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 3>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__1_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__1_3() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18136,18 +18178,43 @@ define void @s_shuffle_v2i64_v8i64__2_3() {
}
define void @s_shuffle_v2i64_v8i64__3_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18191,8 +18258,7 @@ define void @s_shuffle_v2i64_v8i64__4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18204,18 +18270,43 @@ define void @s_shuffle_v2i64_v8i64__4_3() {
}
define void @s_shuffle_v2i64_v8i64__5_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18259,8 +18350,7 @@ define void @s_shuffle_v2i64_v8i64__6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -18274,18 +18364,43 @@ define void @s_shuffle_v2i64_v8i64__6_3() {
}
define void @s_shuffle_v2i64_v8i64__7_3() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 7, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18375,8 +18490,7 @@ define void @s_shuffle_v2i64_v8i64__9_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18458,8 +18572,7 @@ define void @s_shuffle_v2i64_v8i64__10_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18520,8 +18633,7 @@ define void @s_shuffle_v2i64_v8i64__11_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18580,8 +18692,7 @@ define void @s_shuffle_v2i64_v8i64__12_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18636,8 +18747,7 @@ define void @s_shuffle_v2i64_v8i64__13_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18696,8 +18806,7 @@ define void @s_shuffle_v2i64_v8i64__14_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s22, s6
-; GFX942-NEXT: s_mov_b32 s23, s7
+; GFX942-NEXT: s_mov_b64 s[22:23], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
@@ -18744,8 +18853,7 @@ define void @s_shuffle_v2i64_v8i64__u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -18757,18 +18865,43 @@ define void @s_shuffle_v2i64_v8i64__u_4() {
}
define void @s_shuffle_v2i64_v8i64__0_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s16
-; GFX9-NEXT: s_mov_b32 s11, s17
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18776,20 +18909,48 @@ define void @s_shuffle_v2i64_v8i64__0_4() {
}
define void @s_shuffle_v2i64_v8i64__1_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__1_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__1_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__1_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18797,18 +18958,43 @@ define void @s_shuffle_v2i64_v8i64__1_4() {
}
define void @s_shuffle_v2i64_v8i64__2_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18816,90 +19002,145 @@ define void @s_shuffle_v2i64_v8i64__2_4() {
}
define void @s_shuffle_v2i64_v8i64__3_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__3_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__3_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__3_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__3_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__4_4() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 3, i32 4>
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
ret void
}
-define void @s_shuffle_v2i64_v8i64__4_4() {
-; GFX900-LABEL: s_shuffle_v2i64_v8i64__4_4:
+define void @s_shuffle_v2i64_v8i64__5_4() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
-; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13]
-; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
-; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13]
-; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__4_4:
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__5_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__5_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 5, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -18943,8 +19184,7 @@ define void @s_shuffle_v2i64_v8i64__6_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s8
-; GFX942-NEXT: s_mov_b32 s15, s9
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -18958,20 +19198,48 @@ define void @s_shuffle_v2i64_v8i64__6_4() {
}
define void @s_shuffle_v2i64_v8i64__7_4() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__7_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__7_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 7, i32 4>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -19011,8 +19279,7 @@ define void @s_shuffle_v2i64_v8i64__8_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19096,10 +19363,8 @@ define void @s_shuffle_v2i64_v8i64__9_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19160,8 +19425,7 @@ define void @s_shuffle_v2i64_v8i64__10_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19246,10 +19510,8 @@ define void @s_shuffle_v2i64_v8i64__11_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19336,8 +19598,7 @@ define void @s_shuffle_v2i64_v8i64__12_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19422,10 +19683,8 @@ define void @s_shuffle_v2i64_v8i64__13_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19512,8 +19771,7 @@ define void @s_shuffle_v2i64_v8i64__14_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s26, s8
-; GFX942-NEXT: s_mov_b32 s27, s9
+; GFX942-NEXT: s_mov_b64 s[26:27], s[8:9]
; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25]
; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
@@ -19572,18 +19830,43 @@ define void @s_shuffle_v2i64_v8i64__u_5() {
}
define void @s_shuffle_v2i64_v8i64__0_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s18
-; GFX9-NEXT: s_mov_b32 s11, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -19627,8 +19910,7 @@ define void @s_shuffle_v2i64_v8i64__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19640,18 +19922,43 @@ define void @s_shuffle_v2i64_v8i64__1_5() {
}
define void @s_shuffle_v2i64_v8i64__2_5() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -19695,8 +20002,7 @@ define void @s_shuffle_v2i64_v8i64__3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19788,8 +20094,7 @@ define void @s_shuffle_v2i64_v8i64__5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -19837,8 +20142,7 @@ define void @s_shuffle_v2i64_v8i64__6_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -19888,8 +20192,7 @@ define void @s_shuffle_v2i64_v8i64__7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20013,8 +20316,7 @@ define void @s_shuffle_v2i64_v8i64__9_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20069,8 +20371,7 @@ define void @s_shuffle_v2i64_v8i64__10_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20151,8 +20452,7 @@ define void @s_shuffle_v2i64_v8i64__11_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20233,8 +20533,7 @@ define void @s_shuffle_v2i64_v8i64__12_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20315,8 +20614,7 @@ define void @s_shuffle_v2i64_v8i64__13_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20397,8 +20695,7 @@ define void @s_shuffle_v2i64_v8i64__14_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s26, s10
-; GFX942-NEXT: s_mov_b32 s27, s11
+; GFX942-NEXT: s_mov_b64 s[26:27], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25]
; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
@@ -20445,8 +20742,7 @@ define void @s_shuffle_v2i64_v8i64__u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20458,18 +20754,43 @@ define void @s_shuffle_v2i64_v8i64__u_6() {
}
define void @s_shuffle_v2i64_v8i64__0_6() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s20
-; GFX9-NEXT: s_mov_b32 s11, s21
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 6>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -20511,35 +20832,58 @@ define void @s_shuffle_v2i64_v8i64__1_6() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 6>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__2_6() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 1, i32 6>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__2_6() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s16
-; GFX9-NEXT: s_mov_b32 s11, s17
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 6>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -20583,10 +20927,8 @@ define void @s_shuffle_v2i64_v8i64__3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20634,8 +20976,7 @@ define void @s_shuffle_v2i64_v8i64__4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20683,10 +21024,8 @@ define void @s_shuffle_v2i64_v8i64__5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20734,8 +21073,7 @@ define void @s_shuffle_v2i64_v8i64__6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -20785,10 +21123,8 @@ define void @s_shuffle_v2i64_v8i64__7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20832,8 +21168,7 @@ define void @s_shuffle_v2i64_v8i64__8_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -20941,10 +21276,8 @@ define void @s_shuffle_v2i64_v8i64__9_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21005,8 +21338,7 @@ define void @s_shuffle_v2i64_v8i64__10_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21115,10 +21447,8 @@ define void @s_shuffle_v2i64_v8i64__11_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21229,8 +21559,7 @@ define void @s_shuffle_v2i64_v8i64__12_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21339,10 +21668,8 @@ define void @s_shuffle_v2i64_v8i64__13_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21457,8 +21784,7 @@ define void @s_shuffle_v2i64_v8i64__14_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s30, s12
-; GFX942-NEXT: s_mov_b32 s31, s13
+; GFX942-NEXT: s_mov_b64 s[30:31], s[12:13]
; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29]
; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31]
; GFX942-NEXT: ;;#ASMSTART
@@ -21524,18 +21850,43 @@ define void @s_shuffle_v2i64_v8i64__u_7() {
}
define void @s_shuffle_v2i64_v8i64__0_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__0_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s22
-; GFX9-NEXT: s_mov_b32 s11, s23
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__0_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__0_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__0_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 0, i32 7>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -21579,8 +21930,7 @@ define void @s_shuffle_v2i64_v8i64__1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -21594,18 +21944,43 @@ define void @s_shuffle_v2i64_v8i64__1_7() {
}
define void @s_shuffle_v2i64_v8i64__2_7() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__2_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s18
-; GFX9-NEXT: s_mov_b32 s11, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__2_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__2_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__2_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> poison, <2 x i32> <i32 2, i32 7>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
@@ -21649,8 +22024,7 @@ define void @s_shuffle_v2i64_v8i64__3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -21700,8 +22074,7 @@ define void @s_shuffle_v2i64_v8i64__4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -21749,8 +22122,7 @@ define void @s_shuffle_v2i64_v8i64__5_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -21845,8 +22217,7 @@ define void @s_shuffle_v2i64_v8i64__7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -22001,8 +22372,7 @@ define void @s_shuffle_v2i64_v8i64__9_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s18
-; GFX942-NEXT: s_mov_b32 s13, s19
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -22065,8 +22435,7 @@ define void @s_shuffle_v2i64_v8i64__10_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22175,8 +22544,7 @@ define void @s_shuffle_v2i64_v8i64__11_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s22
-; GFX942-NEXT: s_mov_b32 s13, s23
+; GFX942-NEXT: s_mov_b64 s[12:13], s[22:23]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -22289,8 +22657,7 @@ define void @s_shuffle_v2i64_v8i64__12_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22399,8 +22766,7 @@ define void @s_shuffle_v2i64_v8i64__13_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s26
-; GFX942-NEXT: s_mov_b32 s13, s27
+; GFX942-NEXT: s_mov_b64 s[12:13], s[26:27]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -22517,8 +22883,7 @@ define void @s_shuffle_v2i64_v8i64__14_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s30, s14
-; GFX942-NEXT: s_mov_b32 s31, s15
+; GFX942-NEXT: s_mov_b64 s[30:31], s[14:15]
; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29]
; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31]
; GFX942-NEXT: ;;#ASMSTART
@@ -22625,8 +22990,7 @@ define void @s_shuffle_v2i64_v8i64__1_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22710,8 +23074,7 @@ define void @s_shuffle_v2i64_v8i64__3_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22799,8 +23162,7 @@ define void @s_shuffle_v2i64_v8i64__5_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22889,8 +23251,7 @@ define void @s_shuffle_v2i64_v8i64__7_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22952,10 +23313,8 @@ define void @s_shuffle_v2i64_v8i64__9_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -22968,18 +23327,43 @@ define void @s_shuffle_v2i64_v8i64__9_8() {
}
define void @s_shuffle_v2i64_v8i64__10_8() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_8:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_8:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_8:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_8:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 8>
@@ -23024,10 +23408,8 @@ define void @s_shuffle_v2i64_v8i64__11_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23076,8 +23458,7 @@ define void @s_shuffle_v2i64_v8i64__12_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23126,10 +23507,8 @@ define void @s_shuffle_v2i64_v8i64__13_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23178,8 +23557,7 @@ define void @s_shuffle_v2i64_v8i64__14_8() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -23277,8 +23655,7 @@ define void @s_shuffle_v2i64_v8i64__0_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23333,8 +23710,7 @@ define void @s_shuffle_v2i64_v8i64__1_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23389,8 +23765,7 @@ define void @s_shuffle_v2i64_v8i64__2_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23491,8 +23866,7 @@ define void @s_shuffle_v2i64_v8i64__3_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23573,8 +23947,7 @@ define void @s_shuffle_v2i64_v8i64__4_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23656,8 +24029,7 @@ define void @s_shuffle_v2i64_v8i64__5_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23772,8 +24144,7 @@ define void @s_shuffle_v2i64_v8i64__6_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s18
-; GFX942-NEXT: s_mov_b32 s15, s19
+; GFX942-NEXT: s_mov_b64 s[14:15], s[18:19]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -23836,8 +24207,7 @@ define void @s_shuffle_v2i64_v8i64__7_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s26
-; GFX942-NEXT: s_mov_b32 s9, s27
+; GFX942-NEXT: s_mov_b64 s[8:9], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -23891,18 +24261,43 @@ define void @s_shuffle_v2i64_v8i64__8_9() {
}
define void @s_shuffle_v2i64_v8i64__9_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 9>
@@ -23911,18 +24306,43 @@ define void @s_shuffle_v2i64_v8i64__9_9() {
}
define void @s_shuffle_v2i64_v8i64__10_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 9>
@@ -23931,18 +24351,43 @@ define void @s_shuffle_v2i64_v8i64__10_9() {
}
define void @s_shuffle_v2i64_v8i64__11_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 11, i32 9>
@@ -23987,8 +24432,7 @@ define void @s_shuffle_v2i64_v8i64__12_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24001,18 +24445,43 @@ define void @s_shuffle_v2i64_v8i64__12_9() {
}
define void @s_shuffle_v2i64_v8i64__13_9() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_9:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s18
-; GFX9-NEXT: s_mov_b32 s9, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_9:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_9:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_9:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 13, i32 9>
@@ -24057,8 +24526,7 @@ define void @s_shuffle_v2i64_v8i64__14_9() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -24105,8 +24573,7 @@ define void @s_shuffle_v2i64_v8i64__u_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24161,8 +24628,7 @@ define void @s_shuffle_v2i64_v8i64__0_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24221,10 +24687,8 @@ define void @s_shuffle_v2i64_v8i64__1_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24279,8 +24743,7 @@ define void @s_shuffle_v2i64_v8i64__2_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24339,10 +24802,8 @@ define void @s_shuffle_v2i64_v8i64__3_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24423,8 +24884,7 @@ define void @s_shuffle_v2i64_v8i64__4_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24501,13 +24961,11 @@ define void @s_shuffle_v2i64_v8i64__5_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24616,8 +25074,7 @@ define void @s_shuffle_v2i64_v8i64__6_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s20
-; GFX942-NEXT: s_mov_b32 s15, s21
+; GFX942-NEXT: s_mov_b64 s[14:15], s[20:21]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -24734,10 +25191,8 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24756,18 +25211,43 @@ define void @s_shuffle_v2i64_v8i64__7_10() {
}
define void @s_shuffle_v2i64_v8i64__8_10() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_10:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_10:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_10:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_10:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 10>
@@ -24812,10 +25292,8 @@ define void @s_shuffle_v2i64_v8i64__9_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24828,18 +25306,43 @@ define void @s_shuffle_v2i64_v8i64__9_10() {
}
define void @s_shuffle_v2i64_v8i64__10_10() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_10:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_10:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_10:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_10:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 10>
@@ -24884,10 +25387,8 @@ define void @s_shuffle_v2i64_v8i64__11_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24936,8 +25437,7 @@ define void @s_shuffle_v2i64_v8i64__12_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -24986,10 +25486,8 @@ define void @s_shuffle_v2i64_v8i64__13_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25038,8 +25536,7 @@ define void @s_shuffle_v2i64_v8i64__14_10() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -25137,8 +25634,7 @@ define void @s_shuffle_v2i64_v8i64__0_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25239,8 +25735,7 @@ define void @s_shuffle_v2i64_v8i64__1_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25295,8 +25790,7 @@ define void @s_shuffle_v2i64_v8i64__2_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25378,8 +25872,7 @@ define void @s_shuffle_v2i64_v8i64__3_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25466,8 +25959,7 @@ define void @s_shuffle_v2i64_v8i64__4_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25522,8 +26014,7 @@ define void @s_shuffle_v2i64_v8i64__5_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25632,8 +26123,7 @@ define void @s_shuffle_v2i64_v8i64__6_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s22
-; GFX942-NEXT: s_mov_b32 s15, s23
+; GFX942-NEXT: s_mov_b64 s[14:15], s[22:23]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -25696,8 +26186,7 @@ define void @s_shuffle_v2i64_v8i64__7_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25710,18 +26199,43 @@ define void @s_shuffle_v2i64_v8i64__7_11() {
}
define void @s_shuffle_v2i64_v8i64__8_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 11>
@@ -25730,18 +26244,43 @@ define void @s_shuffle_v2i64_v8i64__8_11() {
}
define void @s_shuffle_v2i64_v8i64__9_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 11>
@@ -25791,18 +26330,43 @@ define void @s_shuffle_v2i64_v8i64__10_11() {
}
define void @s_shuffle_v2i64_v8i64__11_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 11, i32 11>
@@ -25847,8 +26411,7 @@ define void @s_shuffle_v2i64_v8i64__12_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -25861,18 +26424,43 @@ define void @s_shuffle_v2i64_v8i64__12_11() {
}
define void @s_shuffle_v2i64_v8i64__13_11() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_11:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_11:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_11:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_11:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 13, i32 11>
@@ -25917,8 +26505,7 @@ define void @s_shuffle_v2i64_v8i64__14_11() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -25965,8 +26552,7 @@ define void @s_shuffle_v2i64_v8i64__u_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26021,8 +26607,7 @@ define void @s_shuffle_v2i64_v8i64__0_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26081,10 +26666,8 @@ define void @s_shuffle_v2i64_v8i64__1_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26139,8 +26722,7 @@ define void @s_shuffle_v2i64_v8i64__2_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26199,10 +26781,8 @@ define void @s_shuffle_v2i64_v8i64__3_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26283,8 +26863,7 @@ define void @s_shuffle_v2i64_v8i64__4_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26361,13 +26940,11 @@ define void @s_shuffle_v2i64_v8i64__5_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26476,8 +27053,7 @@ define void @s_shuffle_v2i64_v8i64__6_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s24
-; GFX942-NEXT: s_mov_b32 s15, s25
+; GFX942-NEXT: s_mov_b64 s[14:15], s[24:25]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -26539,159 +27115,263 @@ define void @s_shuffle_v2i64_v8i64__7_12() {
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_12:
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__7_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
+; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
+; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
+; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
+; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
+; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
+; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
+; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
+; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
+; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[36:51]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s44
+; GFX90A-NEXT: s_mov_b32 s11, s45
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
+; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
+; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
+; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
+; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
+; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
+; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
+; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
+; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
+; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
+; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
+; GFX90A-NEXT: s_waitcnt vmcnt(0)
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: v_writelane_b32 v0, s30, 0
+; GFX942-NEXT: v_writelane_b32 v0, s31, 1
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:31]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s31, v0, 1
+; GFX942-NEXT: v_readlane_b32 s30, v0, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 7, i32 12>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__8_12() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 12>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__9_12() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__9_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 12>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__10_12() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 12>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__11_12() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__11_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__11_12:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX90A-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
-; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
-; GFX90A-NEXT: v_writelane_b32 v0, s36, 0
-; GFX90A-NEXT: v_writelane_b32 v0, s37, 1
-; GFX90A-NEXT: v_writelane_b32 v0, s38, 2
-; GFX90A-NEXT: v_writelane_b32 v0, s39, 3
-; GFX90A-NEXT: v_writelane_b32 v0, s48, 4
-; GFX90A-NEXT: v_writelane_b32 v0, s49, 5
-; GFX90A-NEXT: v_writelane_b32 v0, s50, 6
-; GFX90A-NEXT: v_writelane_b32 v0, s51, 7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[36:51]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s44
-; GFX90A-NEXT: s_mov_b32 s11, s45
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_readlane_b32 s51, v0, 7
-; GFX90A-NEXT: v_readlane_b32 s50, v0, 6
-; GFX90A-NEXT: v_readlane_b32 s49, v0, 5
-; GFX90A-NEXT: v_readlane_b32 s48, v0, 4
-; GFX90A-NEXT: v_readlane_b32 s39, v0, 3
-; GFX90A-NEXT: v_readlane_b32 s38, v0, 2
-; GFX90A-NEXT: v_readlane_b32 s37, v0, 1
-; GFX90A-NEXT: v_readlane_b32 s36, v0, 0
-; GFX90A-NEXT: s_xor_saveexec_b64 s[4:5], -1
-; GFX90A-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
-; GFX90A-NEXT: s_mov_b64 exec, s[4:5]
-; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_12:
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__11_12:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: v_writelane_b32 v0, s30, 0
-; GFX942-NEXT: v_writelane_b32 v0, s31, 1
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
-; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:31]
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s31, v0, 1
-; GFX942-NEXT: v_readlane_b32 s30, v0, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 7, i32 12>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__8_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s16
-; GFX9-NEXT: s_mov_b32 s11, s17
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 12>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__9_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__9_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 12>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__10_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 12>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__11_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__11_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 11, i32 12>
@@ -26736,8 +27416,7 @@ define void @s_shuffle_v2i64_v8i64__12_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s8
-; GFX942-NEXT: s_mov_b32 s11, s9
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26750,20 +27429,48 @@ define void @s_shuffle_v2i64_v8i64__12_12() {
}
define void @s_shuffle_v2i64_v8i64__13_12() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__13_12:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__13_12:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__13_12:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__13_12:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 13, i32 12>
@@ -26808,8 +27515,7 @@ define void @s_shuffle_v2i64_v8i64__14_12() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s8
-; GFX942-NEXT: s_mov_b32 s15, s9
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -26911,8 +27617,7 @@ define void @s_shuffle_v2i64_v8i64__0_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -26975,8 +27680,7 @@ define void @s_shuffle_v2i64_v8i64__1_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27037,8 +27741,7 @@ define void @s_shuffle_v2i64_v8i64__2_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27097,8 +27800,7 @@ define void @s_shuffle_v2i64_v8i64__3_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27179,8 +27881,7 @@ define void @s_shuffle_v2i64_v8i64__4_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27261,8 +27962,7 @@ define void @s_shuffle_v2i64_v8i64__5_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27371,8 +28071,7 @@ define void @s_shuffle_v2i64_v8i64__6_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s26
-; GFX942-NEXT: s_mov_b32 s15, s27
+; GFX942-NEXT: s_mov_b64 s[14:15], s[26:27]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -27485,8 +28184,7 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27499,18 +28197,43 @@ define void @s_shuffle_v2i64_v8i64__7_13() {
}
define void @s_shuffle_v2i64_v8i64__8_13() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_13:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s18
-; GFX9-NEXT: s_mov_b32 s11, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_13:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_13:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_13:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 13>
@@ -27525,62 +28248,86 @@ define void @s_shuffle_v2i64_v8i64__9_13() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13]
-; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX900-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_13:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_13:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <8 x i64> asm "; def $0", "=s"()
+ %vec1 = call <8 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 13>
+ call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v2i64_v8i64__10_13() {
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_13:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v2i64_v8i64__9_13:
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_13:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b64 s[8:9], s[12:13]
-; GFX90A-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v2i64_v8i64__9_13:
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_13:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <8 x i64> asm "; def $0", "=s"()
- %vec1 = call <8 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 9, i32 13>
- call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v2i64_v8i64__10_13() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_13:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 13>
@@ -27625,8 +28372,7 @@ define void @s_shuffle_v2i64_v8i64__11_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27720,8 +28466,7 @@ define void @s_shuffle_v2i64_v8i64__13_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27770,8 +28515,7 @@ define void @s_shuffle_v2i64_v8i64__14_13() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -27818,8 +28562,7 @@ define void @s_shuffle_v2i64_v8i64__u_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27874,8 +28617,7 @@ define void @s_shuffle_v2i64_v8i64__0_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27934,10 +28676,8 @@ define void @s_shuffle_v2i64_v8i64__1_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -27992,8 +28732,7 @@ define void @s_shuffle_v2i64_v8i64__2_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28052,10 +28791,8 @@ define void @s_shuffle_v2i64_v8i64__3_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28136,8 +28873,7 @@ define void @s_shuffle_v2i64_v8i64__4_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28214,13 +28950,11 @@ define void @s_shuffle_v2i64_v8i64__5_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s24
-; GFX942-NEXT: s_mov_b32 s11, s25
+; GFX942-NEXT: s_mov_b64 s[10:11], s[24:25]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28329,8 +29063,7 @@ define void @s_shuffle_v2i64_v8i64__6_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s28
-; GFX942-NEXT: s_mov_b32 s15, s29
+; GFX942-NEXT: s_mov_b64 s[14:15], s[28:29]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -28447,10 +29180,8 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s28
-; GFX942-NEXT: s_mov_b32 s11, s29
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[28:29]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28469,18 +29200,43 @@ define void @s_shuffle_v2i64_v8i64__7_14() {
}
define void @s_shuffle_v2i64_v8i64__8_14() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_14:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s20
-; GFX9-NEXT: s_mov_b32 s11, s21
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_14:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_14:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_14:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 14>
@@ -28525,10 +29281,8 @@ define void @s_shuffle_v2i64_v8i64__9_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28541,18 +29295,43 @@ define void @s_shuffle_v2i64_v8i64__9_14() {
}
define void @s_shuffle_v2i64_v8i64__10_14() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_14:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s16
-; GFX9-NEXT: s_mov_b32 s11, s17
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_14:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_14:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_14:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 14>
@@ -28597,10 +29376,8 @@ define void @s_shuffle_v2i64_v8i64__11_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28649,8 +29426,7 @@ define void @s_shuffle_v2i64_v8i64__12_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28699,10 +29475,8 @@ define void @s_shuffle_v2i64_v8i64__13_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28751,8 +29525,7 @@ define void @s_shuffle_v2i64_v8i64__14_14() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -28855,8 +29628,7 @@ define void @s_shuffle_v2i64_v8i64__0_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s26
-; GFX942-NEXT: s_mov_b32 s11, s27
+; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -28908,20 +29680,29 @@ define void @s_shuffle_v2i64_v8i64__1_15() {
; GFX942-LABEL: s_shuffle_v2i64_v8i64__1_15:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: v_writelane_b32 v0, s30, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: v_writelane_b32 v0, s31, 1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s16, s2
-; GFX942-NEXT: s_mov_b32 s17, s3
-; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
-; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: v_readlane_b32 s31, v0, 1
+; GFX942-NEXT: v_readlane_b32 s30, v0, 0
+; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
+; GFX942-NEXT: s_mov_b64 exec, s[0:1]
+; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
@@ -28973,8 +29754,7 @@ define void @s_shuffle_v2i64_v8i64__2_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s26
-; GFX942-NEXT: s_mov_b32 s11, s27
+; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29027,16 +29807,15 @@ define void @s_shuffle_v2i64_v8i64__3_15() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s20, s6
-; GFX942-NEXT: s_mov_b32 s21, s7
-; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
-; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29117,8 +29896,7 @@ define void @s_shuffle_v2i64_v8i64__4_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:27]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s26
-; GFX942-NEXT: s_mov_b32 s11, s27
+; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29193,16 +29971,15 @@ define void @s_shuffle_v2i64_v8i64__5_15() {
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:15]
+; GFX942-NEXT: ; def s[8:23]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:27]
+; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s24, s10
-; GFX942-NEXT: s_mov_b32 s25, s11
-; GFX942-NEXT: s_mov_b64 s[8:9], s[24:25]
-; GFX942-NEXT: s_mov_b64 s[10:11], s[26:27]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29311,8 +30088,7 @@ define void @s_shuffle_v2i64_v8i64__6_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:31]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s30
-; GFX942-NEXT: s_mov_b32 s15, s31
+; GFX942-NEXT: s_mov_b64 s[14:15], s[30:31]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -29418,30 +30194,19 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
; GFX942-LABEL: s_shuffle_v2i64_v8i64__7_15:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: v_writelane_b32 v0, s30, 0
-; GFX942-NEXT: v_writelane_b32 v0, s31, 1
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:31]
+; GFX942-NEXT: ; def s[4:19]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s28, s14
-; GFX942-NEXT: s_mov_b32 s29, s15
-; GFX942-NEXT: s_mov_b64 s[8:9], s[28:29]
-; GFX942-NEXT: s_mov_b64 s[10:11], s[30:31]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_readlane_b32 s31, v0, 1
-; GFX942-NEXT: v_readlane_b32 s30, v0, 0
-; GFX942-NEXT: s_xor_saveexec_b64 s[0:1], -1
-; GFX942-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
-; GFX942-NEXT: s_mov_b64 exec, s[0:1]
-; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
@@ -29451,18 +30216,43 @@ define void @s_shuffle_v2i64_v8i64__7_15() {
}
define void @s_shuffle_v2i64_v8i64__8_15() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__8_15:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:23]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s22
-; GFX9-NEXT: s_mov_b32 s11, s23
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__8_15:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__8_15:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__8_15:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 8, i32 15>
@@ -29507,8 +30297,7 @@ define void @s_shuffle_v2i64_v8i64__9_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -29523,18 +30312,43 @@ define void @s_shuffle_v2i64_v8i64__9_15() {
}
define void @s_shuffle_v2i64_v8i64__10_15() {
-; GFX9-LABEL: s_shuffle_v2i64_v8i64__10_15:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:19]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s18
-; GFX9-NEXT: s_mov_b32 s11, s19
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2i64_v8i64__10_15:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2i64_v8i64__10_15:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2i64_v8i64__10_15:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <8 x i64> asm "; def $0", "=s"()
%vec1 = call <8 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <8 x i64> %vec0, <8 x i64> %vec1, <2 x i32> <i32 10, i32 15>
@@ -29579,8 +30393,7 @@ define void @s_shuffle_v2i64_v8i64__11_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
@@ -29631,8 +30444,7 @@ define void @s_shuffle_v2i64_v8i64__12_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -29681,8 +30493,7 @@ define void @s_shuffle_v2i64_v8i64__13_15() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
index 7f8f2dbbb09a1..54e700625d72c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v2p0.ll
@@ -88,8 +88,7 @@ define void @v_shuffle_v2p0_v2p0__1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -144,8 +143,7 @@ define void @v_shuffle_v2p0_v2p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -204,10 +202,8 @@ define void @v_shuffle_v2p0_v2p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -262,8 +258,7 @@ define void @v_shuffle_v2p0_v2p0__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -309,8 +304,7 @@ define void @v_shuffle_v2p0_v2p0__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -355,8 +349,7 @@ define void @v_shuffle_v2p0_v2p0__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -401,8 +394,7 @@ define void @v_shuffle_v2p0_v2p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -446,8 +438,7 @@ define void @v_shuffle_v2p0_v2p0__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -492,8 +483,7 @@ define void @v_shuffle_v2p0_v2p0__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -537,8 +527,7 @@ define void @v_shuffle_v2p0_v2p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -660,8 +649,7 @@ define void @v_shuffle_v2p0_v2p0__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -794,8 +782,7 @@ define void @v_shuffle_v2p0_v2p0__1_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v4, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v2
-; GFX942-NEXT: v_mov_b32_e32 v1, v3
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -900,8 +887,7 @@ define void @v_shuffle_v2p0_v2p0__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -956,8 +942,7 @@ define void @v_shuffle_v2p0_v2p0__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1095,8 +1080,7 @@ define void @s_shuffle_v2p0_v2p0__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1154,8 +1138,7 @@ define void @s_shuffle_v2p0_v2p0__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1213,10 +1196,8 @@ define void @s_shuffle_v2p0_v2p0__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1270,8 +1251,7 @@ define void @s_shuffle_v2p0_v2p0__3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1320,10 +1300,8 @@ define void @s_shuffle_v2p0_v2p0__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1336,18 +1314,43 @@ define void @s_shuffle_v2p0_v2p0__3_2() {
}
define void @s_shuffle_v2p0_v2p0__3_3() {
-; GFX9-LABEL: s_shuffle_v2p0_v2p0__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v2p0__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v2p0__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v2p0__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <2 x i32> <i32 3, i32 3>
@@ -1388,8 +1391,7 @@ define void @s_shuffle_v2p0_v2p0__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1401,18 +1403,43 @@ define void @s_shuffle_v2p0_v2p0__u_0() {
}
define void @s_shuffle_v2p0_v2p0__0_0() {
-; GFX9-LABEL: s_shuffle_v2p0_v2p0__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v2p0__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v2p0__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v2p0__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -1456,10 +1483,8 @@ define void @s_shuffle_v2p0_v2p0__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1503,8 +1528,7 @@ define void @s_shuffle_v2p0_v2p0__2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1596,18 +1620,43 @@ define void @s_shuffle_v2p0_v2p0__0_1() {
}
define void @s_shuffle_v2p0_v2p0__1_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v2p0__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v2p0__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v2p0__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v2p0__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -1741,8 +1790,7 @@ define void @s_shuffle_v2p0_v2p0__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1850,8 +1898,7 @@ define void @s_shuffle_v2p0_v2p0__0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -1905,8 +1952,7 @@ define void @s_shuffle_v2p0_v2p0__1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
index 27a6cf11c4cb1..9c770bf1c77cc 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v3p0.ll
@@ -127,8 +127,7 @@ define void @v_shuffle_v2p0_v3p0__2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -223,8 +222,7 @@ define void @v_shuffle_v2p0_v3p0__5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -283,10 +281,8 @@ define void @v_shuffle_v2p0_v3p0__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -341,8 +337,7 @@ define void @v_shuffle_v2p0_v3p0__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v8
-; GFX942-NEXT: v_mov_b32_e32 v1, v9
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -397,8 +392,7 @@ define void @v_shuffle_v2p0_v3p0__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v10
-; GFX942-NEXT: v_mov_b32_e32 v3, v11
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -447,10 +441,8 @@ define void @v_shuffle_v2p0_v3p0__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -495,8 +487,7 @@ define void @v_shuffle_v2p0_v3p0__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -541,8 +532,7 @@ define void @v_shuffle_v2p0_v3p0__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -587,8 +577,7 @@ define void @v_shuffle_v2p0_v3p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -632,8 +621,7 @@ define void @v_shuffle_v2p0_v3p0__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -677,8 +665,7 @@ define void @v_shuffle_v2p0_v3p0__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -726,10 +713,8 @@ define void @v_shuffle_v2p0_v3p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -773,8 +758,7 @@ define void @v_shuffle_v2p0_v3p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -828,8 +812,7 @@ define void @v_shuffle_v2p0_v3p0__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -952,8 +935,7 @@ define void @v_shuffle_v2p0_v3p0__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -997,8 +979,7 @@ define void @v_shuffle_v2p0_v3p0__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1091,8 +1072,7 @@ define void @v_shuffle_v2p0_v3p0__4_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1176,8 +1156,7 @@ define void @v_shuffle_v2p0_v3p0__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1260,8 +1239,7 @@ define void @v_shuffle_v2p0_v3p0__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1354,8 +1332,7 @@ define void @v_shuffle_v2p0_v3p0__4_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1489,8 +1466,7 @@ define void @v_shuffle_v2p0_v3p0__2_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v4
-; GFX942-NEXT: v_mov_b32_e32 v1, v5
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1545,8 +1521,7 @@ define void @v_shuffle_v2p0_v3p0__4_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1641,8 +1616,7 @@ define void @v_shuffle_v2p0_v3p0__0_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1697,8 +1671,7 @@ define void @v_shuffle_v2p0_v3p0__1_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1753,8 +1726,7 @@ define void @v_shuffle_v2p0_v3p0__2_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1839,8 +1811,7 @@ define void @v_shuffle_v2p0_v3p0__4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1935,8 +1906,7 @@ define void @v_shuffle_v2p0_v3p0__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1991,8 +1961,7 @@ define void @v_shuffle_v2p0_v3p0__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2047,8 +2016,7 @@ define void @v_shuffle_v2p0_v3p0__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v4
-; GFX942-NEXT: v_mov_b32_e32 v9, v5
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2093,8 +2061,7 @@ define void @v_shuffle_v2p0_v3p0__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v6, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2232,8 +2199,7 @@ define void @s_shuffle_v2p0_v3p0__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2273,8 +2239,7 @@ define void @s_shuffle_v2p0_v3p0__2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2332,8 +2297,7 @@ define void @s_shuffle_v2p0_v3p0__4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2374,8 +2338,7 @@ define void @s_shuffle_v2p0_v3p0__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2430,11 +2393,11 @@ define void @s_shuffle_v2p0_v3p0__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2484,8 +2447,7 @@ define void @s_shuffle_v2p0_v3p0__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2539,10 +2501,8 @@ define void @s_shuffle_v2p0_v3p0__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2587,10 +2547,8 @@ define void @s_shuffle_v2p0_v3p0__5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2603,18 +2561,43 @@ define void @s_shuffle_v2p0_v3p0__5_3() {
}
define void @s_shuffle_v2p0_v3p0__5_4() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__5_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__5_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__5_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__5_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> <i32 5, i32 4>
@@ -2659,10 +2642,8 @@ define void @s_shuffle_v2p0_v3p0__5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2707,8 +2688,7 @@ define void @s_shuffle_v2p0_v3p0__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2720,18 +2700,43 @@ define void @s_shuffle_v2p0_v3p0__u_0() {
}
define void @s_shuffle_v2p0_v3p0__0_0() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -2775,10 +2780,8 @@ define void @s_shuffle_v2p0_v3p0__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2822,10 +2825,8 @@ define void @s_shuffle_v2p0_v3p0__2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2869,8 +2870,7 @@ define void @s_shuffle_v2p0_v3p0__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -2924,13 +2924,11 @@ define void @s_shuffle_v2p0_v3p0__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3023,18 +3021,43 @@ define void @s_shuffle_v2p0_v3p0__0_1() {
}
define void @s_shuffle_v2p0_v3p0__1_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -3042,18 +3065,43 @@ define void @s_shuffle_v2p0_v3p0__1_1() {
}
define void @s_shuffle_v2p0_v3p0__2_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -3142,8 +3190,7 @@ define void @s_shuffle_v2p0_v3p0__4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3188,8 +3235,7 @@ define void @s_shuffle_v2p0_v3p0__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3201,18 +3247,43 @@ define void @s_shuffle_v2p0_v3p0__u_2() {
}
define void @s_shuffle_v2p0_v3p0__0_2() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -3256,10 +3327,8 @@ define void @s_shuffle_v2p0_v3p0__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3307,10 +3376,8 @@ define void @s_shuffle_v2p0_v3p0__2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3354,8 +3421,7 @@ define void @s_shuffle_v2p0_v3p0__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3412,10 +3478,8 @@ define void @s_shuffle_v2p0_v3p0__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3514,8 +3578,7 @@ define void @s_shuffle_v2p0_v3p0__1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3555,8 +3618,7 @@ define void @s_shuffle_v2p0_v3p0__2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3618,10 +3680,8 @@ define void @s_shuffle_v2p0_v3p0__4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3716,8 +3776,7 @@ define void @s_shuffle_v2p0_v3p0__0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3771,8 +3830,7 @@ define void @s_shuffle_v2p0_v3p0__1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3826,8 +3884,7 @@ define void @s_shuffle_v2p0_v3p0__2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3881,18 +3938,43 @@ define void @s_shuffle_v2p0_v3p0__3_4() {
}
define void @s_shuffle_v2p0_v3p0__4_4() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> <i32 4, i32 4>
@@ -3933,8 +4015,7 @@ define void @s_shuffle_v2p0_v3p0__u_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3988,8 +4069,7 @@ define void @s_shuffle_v2p0_v3p0__0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4047,10 +4127,8 @@ define void @s_shuffle_v2p0_v3p0__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4104,10 +4182,8 @@ define void @s_shuffle_v2p0_v3p0__2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4120,18 +4196,43 @@ define void @s_shuffle_v2p0_v3p0__2_5() {
}
define void @s_shuffle_v2p0_v3p0__3_5() {
-; GFX9-LABEL: s_shuffle_v2p0_v3p0__3_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v3p0__3_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v3p0__3_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v3p0__3_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <2 x i32> <i32 3, i32 5>
@@ -4176,10 +4277,8 @@ define void @s_shuffle_v2p0_v3p0__4_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
index ae31524ebaa7f..47634638d7674 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v2p0.v4p0.ll
@@ -166,8 +166,7 @@ define void @v_shuffle_v2p0_v4p0__3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -302,8 +301,7 @@ define void @v_shuffle_v2p0_v4p0__7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -362,10 +360,8 @@ define void @v_shuffle_v2p0_v4p0__7_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -420,8 +416,7 @@ define void @v_shuffle_v2p0_v4p0__7_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v10
-; GFX942-NEXT: v_mov_b32_e32 v1, v11
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -476,8 +471,7 @@ define void @v_shuffle_v2p0_v4p0__7_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v12
-; GFX942-NEXT: v_mov_b32_e32 v3, v13
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -532,8 +526,7 @@ define void @v_shuffle_v2p0_v4p0__7_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v14
-; GFX942-NEXT: v_mov_b32_e32 v5, v15
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[14:15]
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -582,10 +575,8 @@ define void @v_shuffle_v2p0_v4p0__7_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -630,8 +621,7 @@ define void @v_shuffle_v2p0_v4p0__7_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -676,8 +666,7 @@ define void @v_shuffle_v2p0_v4p0__7_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -722,8 +711,7 @@ define void @v_shuffle_v2p0_v4p0__7_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -768,8 +756,7 @@ define void @v_shuffle_v2p0_v4p0__u_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -813,8 +800,7 @@ define void @v_shuffle_v2p0_v4p0__0_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -858,8 +844,7 @@ define void @v_shuffle_v2p0_v4p0__1_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -903,8 +888,7 @@ define void @v_shuffle_v2p0_v4p0__2_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -952,10 +936,8 @@ define void @v_shuffle_v2p0_v4p0__3_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -999,8 +981,7 @@ define void @v_shuffle_v2p0_v4p0__4_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v1
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1054,8 +1035,7 @@ define void @v_shuffle_v2p0_v4p0__5_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1110,8 +1090,7 @@ define void @v_shuffle_v2p0_v4p0__6_0(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v0
-; GFX942-NEXT: v_mov_b32_e32 v9, v1
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v10, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1234,8 +1213,7 @@ define void @v_shuffle_v2p0_v4p0__1_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1279,8 +1257,7 @@ define void @v_shuffle_v2p0_v4p0__2_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1324,8 +1301,7 @@ define void @v_shuffle_v2p0_v4p0__3_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1418,8 +1394,7 @@ define void @v_shuffle_v2p0_v4p0__5_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v2
-; GFX942-NEXT: v_mov_b32_e32 v9, v3
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v12, v[6:9], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1474,8 +1449,7 @@ define void @v_shuffle_v2p0_v4p0__6_1(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v2
-; GFX942-NEXT: v_mov_b32_e32 v11, v3
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v12, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1559,8 +1533,7 @@ define void @v_shuffle_v2p0_v4p0__0_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1643,8 +1616,7 @@ define void @v_shuffle_v2p0_v4p0__2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1688,8 +1660,7 @@ define void @v_shuffle_v2p0_v4p0__3_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1782,8 +1753,7 @@ define void @v_shuffle_v2p0_v4p0__5_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v4
-; GFX942-NEXT: v_mov_b32_e32 v11, v5
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v14, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1838,8 +1808,7 @@ define void @v_shuffle_v2p0_v4p0__6_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v4
-; GFX942-NEXT: v_mov_b32_e32 v13, v5
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v14, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1923,8 +1892,7 @@ define void @v_shuffle_v2p0_v4p0__0_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1968,8 +1936,7 @@ define void @v_shuffle_v2p0_v4p0__1_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2052,8 +2019,7 @@ define void @v_shuffle_v2p0_v4p0__3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2146,8 +2112,7 @@ define void @v_shuffle_v2p0_v4p0__5_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2202,8 +2167,7 @@ define void @v_shuffle_v2p0_v4p0__6_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v14, v6
-; GFX942-NEXT: v_mov_b32_e32 v15, v7
+; GFX942-NEXT: v_mov_b64_e32 v[14:15], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2376,8 +2340,7 @@ define void @v_shuffle_v2p0_v4p0__3_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v0, v6
-; GFX942-NEXT: v_mov_b32_e32 v1, v7
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2432,8 +2395,7 @@ define void @v_shuffle_v2p0_v4p0__5_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v0
-; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2478,8 +2440,7 @@ define void @v_shuffle_v2p0_v4p0__6_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v0
-; GFX942-NEXT: v_mov_b32_e32 v7, v1
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[0:1]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2574,8 +2535,7 @@ define void @v_shuffle_v2p0_v4p0__0_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2630,8 +2590,7 @@ define void @v_shuffle_v2p0_v4p0__1_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2686,8 +2645,7 @@ define void @v_shuffle_v2p0_v4p0__2_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v8
-; GFX942-NEXT: v_mov_b32_e32 v7, v9
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2742,8 +2700,7 @@ define void @v_shuffle_v2p0_v4p0__3_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v8, v6
-; GFX942-NEXT: v_mov_b32_e32 v9, v7
+; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2828,8 +2785,7 @@ define void @v_shuffle_v2p0_v4p0__5_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v2
-; GFX942-NEXT: v_mov_b32_e32 v5, v3
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2874,8 +2830,7 @@ define void @v_shuffle_v2p0_v4p0__6_5(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v2
-; GFX942-NEXT: v_mov_b32_e32 v7, v3
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[2:3]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2970,8 +2925,7 @@ define void @v_shuffle_v2p0_v4p0__0_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3026,8 +2980,7 @@ define void @v_shuffle_v2p0_v4p0__1_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v8
-; GFX942-NEXT: v_mov_b32_e32 v5, v9
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3082,8 +3035,7 @@ define void @v_shuffle_v2p0_v4p0__2_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v10
-; GFX942-NEXT: v_mov_b32_e32 v7, v11
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3138,8 +3090,7 @@ define void @v_shuffle_v2p0_v4p0__3_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v10, v6
-; GFX942-NEXT: v_mov_b32_e32 v11, v7
+; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[10:13], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3184,8 +3135,7 @@ define void @v_shuffle_v2p0_v4p0__4_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v4
-; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3270,8 +3220,7 @@ define void @v_shuffle_v2p0_v4p0__6_6(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v4
-; GFX942-NEXT: v_mov_b32_e32 v7, v5
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3366,8 +3315,7 @@ define void @v_shuffle_v2p0_v4p0__0_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[8:9]
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3422,8 +3370,7 @@ define void @v_shuffle_v2p0_v4p0__1_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v10
-; GFX942-NEXT: v_mov_b32_e32 v5, v11
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[10:11]
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3478,8 +3425,7 @@ define void @v_shuffle_v2p0_v4p0__2_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v6, v12
-; GFX942-NEXT: v_mov_b32_e32 v7, v13
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[12:13]
; GFX942-NEXT: global_store_dwordx4 v14, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3534,8 +3480,7 @@ define void @v_shuffle_v2p0_v4p0__3_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v12, v6
-; GFX942-NEXT: v_mov_b32_e32 v13, v7
+; GFX942-NEXT: v_mov_b64_e32 v[12:13], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3580,8 +3525,7 @@ define void @v_shuffle_v2p0_v4p0__4_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: v_mov_b64_e32 v[2:3], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3626,8 +3570,7 @@ define void @v_shuffle_v2p0_v4p0__5_7(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v4, v6
-; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], v[6:7]
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -3765,8 +3708,7 @@ define void @s_shuffle_v2p0_v4p0__1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3850,8 +3792,7 @@ define void @s_shuffle_v2p0_v4p0__3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3909,8 +3850,7 @@ define void @s_shuffle_v2p0_v4p0__5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -3996,8 +3936,7 @@ define void @s_shuffle_v2p0_v4p0__7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4056,10 +3995,8 @@ define void @s_shuffle_v2p0_v4p0__7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4113,8 +4050,7 @@ define void @s_shuffle_v2p0_v4p0__7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4172,10 +4108,8 @@ define void @s_shuffle_v2p0_v4p0__7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4230,8 +4164,7 @@ define void @s_shuffle_v2p0_v4p0__7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4280,10 +4213,8 @@ define void @s_shuffle_v2p0_v4p0__7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4296,18 +4227,43 @@ define void @s_shuffle_v2p0_v4p0__7_4() {
}
define void @s_shuffle_v2p0_v4p0__7_5() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__7_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__7_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__7_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 7, i32 5>
@@ -4352,10 +4308,8 @@ define void @s_shuffle_v2p0_v4p0__7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4368,18 +4322,43 @@ define void @s_shuffle_v2p0_v4p0__7_6() {
}
define void @s_shuffle_v2p0_v4p0__7_7() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 7, i32 7>
@@ -4420,8 +4399,7 @@ define void @s_shuffle_v2p0_v4p0__u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4433,18 +4411,43 @@ define void @s_shuffle_v2p0_v4p0__u_0() {
}
define void @s_shuffle_v2p0_v4p0__0_0() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4488,10 +4491,8 @@ define void @s_shuffle_v2p0_v4p0__1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4503,18 +4504,43 @@ define void @s_shuffle_v2p0_v4p0__1_0() {
}
define void @s_shuffle_v2p0_v4p0__2_0() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4558,10 +4584,8 @@ define void @s_shuffle_v2p0_v4p0__3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4605,8 +4629,7 @@ define void @s_shuffle_v2p0_v4p0__4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4664,10 +4687,8 @@ define void @s_shuffle_v2p0_v4p0__5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4722,8 +4743,7 @@ define void @s_shuffle_v2p0_v4p0__6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -4816,18 +4836,43 @@ define void @s_shuffle_v2p0_v4p0__0_1() {
}
define void @s_shuffle_v2p0_v4p0__1_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4835,18 +4880,43 @@ define void @s_shuffle_v2p0_v4p0__1_1() {
}
define void @s_shuffle_v2p0_v4p0__2_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4854,18 +4924,43 @@ define void @s_shuffle_v2p0_v4p0__2_1() {
}
define void @s_shuffle_v2p0_v4p0__3_1() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__3_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -4954,8 +5049,7 @@ define void @s_shuffle_v2p0_v4p0__5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5010,8 +5104,7 @@ define void @s_shuffle_v2p0_v4p0__6_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5056,8 +5149,7 @@ define void @s_shuffle_v2p0_v4p0__u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5069,18 +5161,43 @@ define void @s_shuffle_v2p0_v4p0__u_2() {
}
define void @s_shuffle_v2p0_v4p0__0_2() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5124,10 +5241,8 @@ define void @s_shuffle_v2p0_v4p0__1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5139,18 +5254,43 @@ define void @s_shuffle_v2p0_v4p0__1_2() {
}
define void @s_shuffle_v2p0_v4p0__2_2() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5194,10 +5334,8 @@ define void @s_shuffle_v2p0_v4p0__3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5241,8 +5379,7 @@ define void @s_shuffle_v2p0_v4p0__4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5299,10 +5436,8 @@ define void @s_shuffle_v2p0_v4p0__5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5357,8 +5492,7 @@ define void @s_shuffle_v2p0_v4p0__6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5411,18 +5545,43 @@ define void @s_shuffle_v2p0_v4p0__u_3() {
}
define void @s_shuffle_v2p0_v4p0__0_3() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5430,18 +5589,43 @@ define void @s_shuffle_v2p0_v4p0__0_3() {
}
define void @s_shuffle_v2p0_v4p0__1_3() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__1_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__1_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__1_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__1_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5489,18 +5673,43 @@ define void @s_shuffle_v2p0_v4p0__2_3() {
}
define void @s_shuffle_v2p0_v4p0__3_3() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <2 x i32> <i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:11]}"(<2 x ptr> %shuf)
@@ -5590,8 +5799,7 @@ define void @s_shuffle_v2p0_v4p0__5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5646,8 +5854,7 @@ define void @s_shuffle_v2p0_v4p0__6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5746,8 +5953,7 @@ define void @s_shuffle_v2p0_v4p0__1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5831,8 +6037,7 @@ define void @s_shuffle_v2p0_v4p0__3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5894,10 +6099,8 @@ define void @s_shuffle_v2p0_v4p0__5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -5910,18 +6113,43 @@ define void @s_shuffle_v2p0_v4p0__5_4() {
}
define void @s_shuffle_v2p0_v4p0__6_4() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s4
-; GFX9-NEXT: s_mov_b32 s11, s5
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 6, i32 4>
@@ -6012,8 +6240,7 @@ define void @s_shuffle_v2p0_v4p0__0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6067,8 +6294,7 @@ define void @s_shuffle_v2p0_v4p0__1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6123,8 +6349,7 @@ define void @s_shuffle_v2p0_v4p0__2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6178,8 +6403,7 @@ define void @s_shuffle_v2p0_v4p0__3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6233,18 +6457,43 @@ define void @s_shuffle_v2p0_v4p0__4_5() {
}
define void @s_shuffle_v2p0_v4p0__5_5() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 5, i32 5>
@@ -6253,18 +6502,43 @@ define void @s_shuffle_v2p0_v4p0__5_5() {
}
define void @s_shuffle_v2p0_v4p0__6_5() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s6
-; GFX9-NEXT: s_mov_b32 s11, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 6, i32 5>
@@ -6305,8 +6579,7 @@ define void @s_shuffle_v2p0_v4p0__u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6360,8 +6633,7 @@ define void @s_shuffle_v2p0_v4p0__0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6419,10 +6691,8 @@ define void @s_shuffle_v2p0_v4p0__1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6477,8 +6747,7 @@ define void @s_shuffle_v2p0_v4p0__2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6536,10 +6805,8 @@ define void @s_shuffle_v2p0_v4p0__3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6552,18 +6819,43 @@ define void @s_shuffle_v2p0_v4p0__3_6() {
}
define void @s_shuffle_v2p0_v4p0__4_6() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__4_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__4_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__4_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 4, i32 6>
@@ -6608,10 +6900,8 @@ define void @s_shuffle_v2p0_v4p0__5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6624,18 +6914,43 @@ define void @s_shuffle_v2p0_v4p0__5_6() {
}
define void @s_shuffle_v2p0_v4p0__6_6() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 6, i32 6>
@@ -6726,8 +7041,7 @@ define void @s_shuffle_v2p0_v4p0__0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6782,8 +7096,7 @@ define void @s_shuffle_v2p0_v4p0__1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6838,8 +7151,7 @@ define void @s_shuffle_v2p0_v4p0__2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6894,8 +7206,7 @@ define void @s_shuffle_v2p0_v4p0__3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:11]
; GFX942-NEXT: ;;#ASMEND
@@ -6908,18 +7219,43 @@ define void @s_shuffle_v2p0_v4p0__3_7() {
}
define void @s_shuffle_v2p0_v4p0__4_7() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__4_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__4_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__4_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__4_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 4, i32 7>
@@ -6928,18 +7264,43 @@ define void @s_shuffle_v2p0_v4p0__4_7() {
}
define void @s_shuffle_v2p0_v4p0__5_7() {
-; GFX9-LABEL: s_shuffle_v2p0_v4p0__5_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[4:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s6
-; GFX9-NEXT: s_mov_b32 s9, s7
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v2p0_v4p0__5_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v2p0_v4p0__5_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v2p0_v4p0__5_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <2 x i32> <i32 5, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
index a15fc3212f474..1b3cf9c6eba0f 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v2i64.ll
@@ -45,6 +45,7 @@ define void @v_shuffle_v3i64_v2i64__0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -84,10 +85,11 @@ define void @v_shuffle_v3i64_v2i64__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -140,10 +142,11 @@ define void @v_shuffle_v3i64_v2i64__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -203,7 +206,7 @@ define void @v_shuffle_v3i64_v2i64__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -254,14 +257,14 @@ define void @v_shuffle_v3i64_v2i64__3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -308,7 +311,7 @@ define void @v_shuffle_v3i64_v2i64__3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
@@ -351,10 +354,11 @@ define void @v_shuffle_v3i64_v2i64__3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v2i64__3_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -1472,6 +1476,7 @@ define void @v_shuffle_v3i64_v2i64__0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1511,10 +1516,11 @@ define void @v_shuffle_v3i64_v2i64__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v2i64__1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -2266,8 +2272,7 @@ define void @s_shuffle_v3i64_v2i64__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2325,8 +2330,7 @@ define void @s_shuffle_v3i64_v2i64__3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2384,10 +2388,8 @@ define void @s_shuffle_v3i64_v2i64__3_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2441,8 +2443,7 @@ define void @s_shuffle_v3i64_v2i64__3_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2491,10 +2492,8 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2507,18 +2506,43 @@ define void @s_shuffle_v3i64_v2i64__3_2_u() {
}
define void @s_shuffle_v3i64_v2i64__3_3_u() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_3_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_3_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_3_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 3, i32 poison>
@@ -2572,10 +2596,8 @@ define void @s_shuffle_v3i64_v2i64__3_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2633,10 +2655,8 @@ define void @s_shuffle_v3i64_v2i64__3_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2689,12 +2709,9 @@ define void @s_shuffle_v3i64_v2i64__3_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2707,20 +2724,48 @@ define void @s_shuffle_v3i64_v2i64__3_3_2() {
}
define void @s_shuffle_v3i64_v2i64__3_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 3, i32 3, i32 3>
@@ -2765,10 +2810,8 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2780,20 +2823,48 @@ define void @s_shuffle_v3i64_v2i64__u_0_0() {
}
define void @s_shuffle_v3i64_v2i64__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -2841,12 +2912,9 @@ define void @s_shuffle_v3i64_v2i64__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2894,10 +2962,8 @@ define void @s_shuffle_v3i64_v2i64__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2958,12 +3024,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3021,10 +3084,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3086,12 +3147,9 @@ define void @s_shuffle_v3i64_v2i64__3_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3153,12 +3211,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3171,18 +3226,43 @@ define void @s_shuffle_v3i64_v2i64__3_2_0() {
}
define void @s_shuffle_v3i64_v2i64__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__u_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__u_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -3190,18 +3270,43 @@ define void @s_shuffle_v3i64_v2i64__u_1_1() {
}
define void @s_shuffle_v3i64_v2i64__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__0_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__0_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__0_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -3209,20 +3314,48 @@ define void @s_shuffle_v3i64_v2i64__0_1_1() {
}
define void @s_shuffle_v3i64_v2i64__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -3230,18 +3363,43 @@ define void @s_shuffle_v3i64_v2i64__1_1_1() {
}
define void @s_shuffle_v3i64_v2i64__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__2_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__2_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -3294,10 +3452,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3355,10 +3511,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3420,12 +3574,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3487,12 +3638,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3591,8 +3739,7 @@ define void @s_shuffle_v3i64_v2i64__1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3658,12 +3805,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3712,10 +3856,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3773,12 +3915,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3836,10 +3975,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3852,18 +3989,43 @@ define void @s_shuffle_v3i64_v2i64__3_1_2() {
}
define void @s_shuffle_v3i64_v2i64__u_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__u_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 poison, i32 3, i32 3>
@@ -3917,10 +4079,8 @@ define void @s_shuffle_v3i64_v2i64__0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3978,10 +4138,8 @@ define void @s_shuffle_v3i64_v2i64__1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3994,18 +4152,43 @@ define void @s_shuffle_v3i64_v2i64__1_3_3() {
}
define void @s_shuffle_v3i64_v2i64__2_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v2i64__2_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v2i64__2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v2i64__2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v2i64__2_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <3 x i32> <i32 2, i32 3, i32 3>
@@ -4050,10 +4233,8 @@ define void @s_shuffle_v3i64_v2i64__3_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4115,12 +4296,9 @@ define void @s_shuffle_v3i64_v2i64__3_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4178,10 +4356,8 @@ define void @s_shuffle_v3i64_v2i64__3_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4234,12 +4410,9 @@ define void @s_shuffle_v3i64_v2i64__3_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
index f15dd7d2772e5..0ff4daf6feb33 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v3i64.ll
@@ -87,6 +87,7 @@ define void @v_shuffle_v3i64_v3i64__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -126,10 +127,11 @@ define void @v_shuffle_v3i64_v3i64__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -182,6 +184,7 @@ define void @v_shuffle_v3i64_v3i64__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -222,10 +225,11 @@ define void @v_shuffle_v3i64_v3i64__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -285,7 +289,7 @@ define void @v_shuffle_v3i64_v3i64__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -336,10 +340,11 @@ define void @v_shuffle_v3i64_v3i64__5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
@@ -399,7 +404,7 @@ define void @v_shuffle_v3i64_v3i64__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v10
; GFX942-NEXT: v_mov_b32_e32 v3, v11
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
@@ -454,6 +459,7 @@ define void @v_shuffle_v3i64_v3i64__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -494,10 +500,11 @@ define void @v_shuffle_v3i64_v3i64__5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -546,6 +553,7 @@ define void @v_shuffle_v3i64_v3i64__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2902,6 +2910,7 @@ define void @v_shuffle_v3i64_v3i64__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2941,10 +2950,11 @@ define void @v_shuffle_v3i64_v3i64__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v3i64__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -4720,8 +4730,7 @@ define void @s_shuffle_v3i64_v3i64__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4761,8 +4770,7 @@ define void @s_shuffle_v3i64_v3i64__2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4820,8 +4828,7 @@ define void @s_shuffle_v3i64_v3i64__4_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4862,8 +4869,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4918,11 +4924,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4972,8 +4978,7 @@ define void @s_shuffle_v3i64_v3i64__5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5027,10 +5032,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5075,10 +5078,8 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5091,18 +5092,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_u() {
}
define void @s_shuffle_v3i64_v3i64__5_4_u() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 poison>
@@ -5147,10 +5173,8 @@ define void @s_shuffle_v3i64_v3i64__5_5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5212,12 +5236,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5279,12 +5300,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5342,10 +5360,8 @@ define void @s_shuffle_v3i64_v3i64__5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5394,12 +5410,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5452,12 +5465,9 @@ define void @s_shuffle_v3i64_v3i64__5_5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5470,20 +5480,48 @@ define void @s_shuffle_v3i64_v3i64__5_5_4() {
}
define void @s_shuffle_v3i64_v3i64__5_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
@@ -5528,10 +5566,8 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5543,20 +5579,48 @@ define void @s_shuffle_v3i64_v3i64__u_0_0() {
}
define void @s_shuffle_v3i64_v3i64__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -5604,12 +5668,9 @@ define void @s_shuffle_v3i64_v3i64__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5657,12 +5718,9 @@ define void @s_shuffle_v3i64_v3i64__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5710,10 +5768,8 @@ define void @s_shuffle_v3i64_v3i64__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5771,15 +5827,12 @@ define void @s_shuffle_v3i64_v3i64__4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5838,13 +5891,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5899,11 +5950,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5962,13 +6013,11 @@ define void @s_shuffle_v3i64_v3i64__5_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6022,12 +6071,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6086,13 +6132,11 @@ define void @s_shuffle_v3i64_v3i64__5_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6150,10 +6194,8 @@ define void @s_shuffle_v3i64_v3i64__5_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6166,116 +6208,12 @@ define void @s_shuffle_v3i64_v3i64__5_4_0() {
}
define void @s_shuffle_v3i64_v3i64__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__4_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -6283,17 +6221,12 @@ define void @s_shuffle_v3i64_v3i64__4_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -6301,40 +6234,30 @@ define void @s_shuffle_v3i64_v3i64__4_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 1, i32 1>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v3i64__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+define void @s_shuffle_v3i64_v3i64__0_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -6342,15 +6265,12 @@ define void @s_shuffle_v3i64_v3i64__5_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -6358,19 +6278,268 @@ define void @s_shuffle_v3i64_v3i64__5_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__1_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__2_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__3_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__4_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6425,11 +6594,11 @@ define void @s_shuffle_v3i64_v3i64__5_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6488,13 +6657,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6552,12 +6719,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6577,217 +6741,344 @@ define void @s_shuffle_v3i64_v3i64__5_3_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__5_4_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %vec1 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__u_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__0_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__1_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v3i64__2_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_3_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_3_1:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 1>
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v3i64__5_4_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+define void @s_shuffle_v3i64_v3i64__3_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_1:
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %vec1 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__0_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v3i64__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -6840,10 +7131,8 @@ define void @s_shuffle_v3i64_v3i64__4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6897,10 +7186,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6950,8 +7237,7 @@ define void @s_shuffle_v3i64_v3i64__5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7009,12 +7295,9 @@ define void @s_shuffle_v3i64_v3i64__5_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7064,8 +7347,7 @@ define void @s_shuffle_v3i64_v3i64__5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7119,10 +7401,8 @@ define void @s_shuffle_v3i64_v3i64__5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7180,10 +7460,8 @@ define void @s_shuffle_v3i64_v3i64__5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7282,8 +7560,7 @@ define void @s_shuffle_v3i64_v3i64__1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7323,8 +7600,7 @@ define void @s_shuffle_v3i64_v3i64__2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7390,12 +7666,9 @@ define void @s_shuffle_v3i64_v3i64__4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7444,12 +7717,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7494,10 +7764,8 @@ define void @s_shuffle_v3i64_v3i64__5_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7552,13 +7820,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7612,10 +7878,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7673,10 +7937,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7725,12 +7987,9 @@ define void @s_shuffle_v3i64_v3i64__5_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7743,18 +8002,43 @@ define void @s_shuffle_v3i64_v3i64__5_4_3() {
}
define void @s_shuffle_v3i64_v3i64__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 poison, i32 4, i32 4>
@@ -7808,10 +8092,8 @@ define void @s_shuffle_v3i64_v3i64__0_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7869,10 +8151,8 @@ define void @s_shuffle_v3i64_v3i64__1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7930,10 +8210,8 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7946,18 +8224,43 @@ define void @s_shuffle_v3i64_v3i64__2_4_4() {
}
define void @s_shuffle_v3i64_v3i64__3_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 3, i32 4, i32 4>
@@ -7966,20 +8269,48 @@ define void @s_shuffle_v3i64_v3i64__3_4_4() {
}
define void @s_shuffle_v3i64_v3i64__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 4, i32 4>
@@ -7988,20 +8319,48 @@ define void @s_shuffle_v3i64_v3i64__4_4_4() {
}
define void @s_shuffle_v3i64_v3i64__5_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
@@ -8042,10 +8401,8 @@ define void @s_shuffle_v3i64_v3i64__5_u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8104,13 +8461,11 @@ define void @s_shuffle_v3i64_v3i64__5_0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8164,10 +8519,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8225,12 +8578,9 @@ define void @s_shuffle_v3i64_v3i64__5_2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8279,12 +8629,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8297,18 +8644,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_4() {
}
define void @s_shuffle_v3i64_v3i64__u_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
@@ -8362,10 +8734,8 @@ define void @s_shuffle_v3i64_v3i64__0_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8423,10 +8793,8 @@ define void @s_shuffle_v3i64_v3i64__1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8484,10 +8852,8 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8500,18 +8866,43 @@ define void @s_shuffle_v3i64_v3i64__2_5_5() {
}
define void @s_shuffle_v3i64_v3i64__3_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__3_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__3_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__3_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__3_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
@@ -8520,20 +8911,48 @@ define void @s_shuffle_v3i64_v3i64__3_5_5() {
}
define void @s_shuffle_v3i64_v3i64__4_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__4_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__4_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__4_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
@@ -8542,18 +8961,43 @@ define void @s_shuffle_v3i64_v3i64__4_5_5() {
}
define void @s_shuffle_v3i64_v3i64__5_u_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_u_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_u_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_u_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_u_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 poison, i32 5>
@@ -8607,10 +9051,8 @@ define void @s_shuffle_v3i64_v3i64__5_0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8668,10 +9110,8 @@ define void @s_shuffle_v3i64_v3i64__5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8729,10 +9169,8 @@ define void @s_shuffle_v3i64_v3i64__5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8785,12 +9223,9 @@ define void @s_shuffle_v3i64_v3i64__5_3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8803,18 +9238,43 @@ define void @s_shuffle_v3i64_v3i64__5_3_5() {
}
define void @s_shuffle_v3i64_v3i64__5_4_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v3i64__5_4_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v3i64__5_4_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v3i64__5_4_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v3i64__5_4_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 5>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
index 6e156d2d4a2f5..58f1e346b6503 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3i64.v4i64.ll
@@ -87,6 +87,7 @@ define void @v_shuffle_v3i64_v4i64__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -126,6 +127,7 @@ define void @v_shuffle_v3i64_v4i64__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -165,10 +167,11 @@ define void @v_shuffle_v3i64_v4i64__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -221,6 +224,7 @@ define void @v_shuffle_v3i64_v4i64__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -261,6 +265,7 @@ define void @v_shuffle_v3i64_v4i64__6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -301,10 +306,11 @@ define void @v_shuffle_v3i64_v4i64__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -364,7 +370,7 @@ define void @v_shuffle_v3i64_v4i64__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v8
; GFX942-NEXT: v_mov_b32_e32 v3, v9
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -415,10 +421,11 @@ define void @v_shuffle_v3i64_v4i64__7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
@@ -478,7 +485,7 @@ define void @v_shuffle_v3i64_v4i64__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v12
; GFX942-NEXT: v_mov_b32_e32 v3, v13
; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
@@ -534,7 +541,7 @@ define void @v_shuffle_v3i64_v4i64__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v14
; GFX942-NEXT: v_mov_b32_e32 v5, v15
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
@@ -589,6 +596,7 @@ define void @v_shuffle_v3i64_v4i64__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -629,10 +637,11 @@ define void @v_shuffle_v3i64_v4i64__7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v4i64__7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -681,6 +690,7 @@ define void @v_shuffle_v3i64_v4i64__7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -727,6 +737,7 @@ define void @v_shuffle_v3i64_v4i64__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4719,6 +4730,7 @@ define void @v_shuffle_v3i64_v4i64__1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4758,6 +4770,7 @@ define void @v_shuffle_v3i64_v4i64__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4797,10 +4810,11 @@ define void @v_shuffle_v3i64_v4i64__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3i64_v4i64__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -8016,8 +8030,7 @@ define void @s_shuffle_v3i64_v4i64__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8057,8 +8070,7 @@ define void @s_shuffle_v3i64_v4i64__2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8102,8 +8114,7 @@ define void @s_shuffle_v3i64_v4i64__3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8161,8 +8172,7 @@ define void @s_shuffle_v3i64_v4i64__5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8203,8 +8213,7 @@ define void @s_shuffle_v3i64_v4i64__6_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8249,8 +8258,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8309,10 +8317,8 @@ define void @s_shuffle_v3i64_v4i64__7_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8366,8 +8372,7 @@ define void @s_shuffle_v3i64_v4i64__7_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8425,10 +8430,8 @@ define void @s_shuffle_v3i64_v4i64__7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8482,10 +8485,8 @@ define void @s_shuffle_v3i64_v4i64__7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8534,10 +8535,8 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8550,18 +8549,43 @@ define void @s_shuffle_v3i64_v4i64__7_4_u() {
}
define void @s_shuffle_v3i64_v4i64__7_5_u() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
@@ -8606,10 +8630,8 @@ define void @s_shuffle_v3i64_v4i64__7_6_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8654,10 +8676,8 @@ define void @s_shuffle_v3i64_v4i64__7_7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8716,13 +8736,11 @@ define void @s_shuffle_v3i64_v4i64__7_7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8781,13 +8799,11 @@ define void @s_shuffle_v3i64_v4i64__7_7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8841,10 +8857,8 @@ define void @s_shuffle_v3i64_v4i64__7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8902,12 +8916,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8956,12 +8967,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9010,12 +9018,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9028,20 +9033,48 @@ define void @s_shuffle_v3i64_v4i64__7_7_5() {
}
define void @s_shuffle_v3i64_v4i64__7_7_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_7_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
@@ -9090,12 +9123,9 @@ define void @s_shuffle_v3i64_v4i64__7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9144,10 +9174,8 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9159,20 +9187,48 @@ define void @s_shuffle_v3i64_v4i64__u_0_0() {
}
define void @s_shuffle_v3i64_v4i64__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
@@ -9220,12 +9276,9 @@ define void @s_shuffle_v3i64_v4i64__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9273,12 +9326,9 @@ define void @s_shuffle_v3i64_v4i64__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9330,12 +9380,9 @@ define void @s_shuffle_v3i64_v4i64__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9383,10 +9430,8 @@ define void @s_shuffle_v3i64_v4i64__4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9444,15 +9489,12 @@ define void @s_shuffle_v3i64_v4i64__5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9511,13 +9553,11 @@ define void @s_shuffle_v3i64_v4i64__6_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9576,15 +9616,12 @@ define void @s_shuffle_v3i64_v4i64__7_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9639,13 +9676,11 @@ define void @s_shuffle_v3i64_v4i64__7_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9704,15 +9739,12 @@ define void @s_shuffle_v3i64_v4i64__7_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9770,12 +9802,9 @@ define void @s_shuffle_v3i64_v4i64__7_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9833,12 +9862,9 @@ define void @s_shuffle_v3i64_v4i64__7_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9897,15 +9923,12 @@ define void @s_shuffle_v3i64_v4i64__7_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9963,10 +9986,8 @@ define void @s_shuffle_v3i64_v4i64__7_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -10028,12 +10049,9 @@ define void @s_shuffle_v3i64_v4i64__7_6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -10046,137 +10064,12 @@ define void @s_shuffle_v3i64_v4i64__7_6_0() {
}
define void @s_shuffle_v3i64_v4i64__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_1_1() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10184,17 +10077,12 @@ define void @s_shuffle_v3i64_v4i64__5_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10202,42 +10090,30 @@ define void @s_shuffle_v3i64_v4i64__5_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
+define void @s_shuffle_v3i64_v4i64__0_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10245,17 +10121,12 @@ define void @s_shuffle_v3i64_v4i64__6_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10263,42 +10134,32 @@ define void @s_shuffle_v3i64_v4i64__6_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+define void @s_shuffle_v3i64_v4i64__1_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10306,17 +10167,14 @@ define void @s_shuffle_v3i64_v4i64__7_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10324,263 +10182,204 @@ define void @s_shuffle_v3i64_v4i64__7_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+define void @s_shuffle_v3i64_v4i64__2_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+define void @s_shuffle_v3i64_v4i64__3_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+define void @s_shuffle_v3i64_v4i64__4_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+define void @s_shuffle_v3i64_v4i64__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10589,128 +10388,116 @@ define void @s_shuffle_v3i64_v4i64__7_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+define void @s_shuffle_v3i64_v4i64__6_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+define void @s_shuffle_v3i64_v4i64__7_1_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10719,23 +10506,21 @@ define void @s_shuffle_v3i64_v4i64__7_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_1() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+define void @s_shuffle_v3i64_v4i64__7_u_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10746,8 +10531,6 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -10755,7 +10538,7 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10766,8 +10549,6 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -10775,190 +10556,134 @@ define void @s_shuffle_v3i64_v4i64__7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__4_2_2() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3i64_v4i64__7_0_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+define void @s_shuffle_v3i64_v4i64__7_2_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10967,55 +10692,58 @@ define void @s_shuffle_v3i64_v4i64__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+define void @s_shuffle_v3i64_v4i64__7_3_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11024,238 +10752,1418 @@ define void @s_shuffle_v3i64_v4i64__6_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+define void @s_shuffle_v3i64_v4i64__7_4_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_5_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_6_1() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__u_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__0_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__1_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__2_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__3_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__4_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__5_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__6_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_2_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_u_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_0_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_1_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_3_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_4_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_5_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__7_6_2() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__u_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__0_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__1_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__2_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+define void @s_shuffle_v3i64_v4i64__3_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+define void @s_shuffle_v3i64_v4i64__4_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+define void @s_shuffle_v3i64_v4i64__5_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11264,21 +12172,22 @@ define void @s_shuffle_v3i64_v4i64__7_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+define void @s_shuffle_v3i64_v4i64__6_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11287,16 +12196,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11305,16 +12214,16 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11323,23 +12232,22 @@ define void @s_shuffle_v3i64_v4i64__7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+define void @s_shuffle_v3i64_v4i64__7_3_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11350,14 +12258,16 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11368,14 +12278,16 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11384,59 +12296,58 @@ define void @s_shuffle_v3i64_v4i64__7_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+define void @s_shuffle_v3i64_v4i64__7_u_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11445,59 +12356,61 @@ define void @s_shuffle_v3i64_v4i64__7_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_2() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+define void @s_shuffle_v3i64_v4i64__7_0_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_2:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11506,30 +12419,32 @@ define void @s_shuffle_v3i64_v4i64__7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__u_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+define void @s_shuffle_v3i64_v4i64__7_1_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11537,14 +12452,17 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11552,58 +12470,42 @@ define void @s_shuffle_v3i64_v4i64__u_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__0_3_3() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__0_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+define void @s_shuffle_v3i64_v4i64__7_2_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11611,16 +12513,19 @@ define void @s_shuffle_v3i64_v4i64__1_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11628,39 +12533,43 @@ define void @s_shuffle_v3i64_v4i64__1_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+define void @s_shuffle_v3i64_v4i64__7_4_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11668,16 +12577,19 @@ define void @s_shuffle_v3i64_v4i64__2_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11685,94 +12597,102 @@ define void @s_shuffle_v3i64_v4i64__2_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+define void @s_shuffle_v3i64_v4i64__7_5_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__4_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+define void @s_shuffle_v3i64_v4i64__7_6_3() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11780,14 +12700,19 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11795,649 +12720,531 @@ define void @s_shuffle_v3i64_v4i64__4_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 3, i32 3>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+define void @s_shuffle_v3i64_v4i64__u_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__0_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+define void @s_shuffle_v3i64_v4i64__1_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_3_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
+define void @s_shuffle_v3i64_v4i64__2_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+define void @s_shuffle_v3i64_v4i64__3_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+define void @s_shuffle_v3i64_v4i64__4_4_4() {
+; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__5_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+define void @s_shuffle_v3i64_v4i64__6_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+define void @s_shuffle_v3i64_v4i64__7_4_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+define void @s_shuffle_v3i64_v4i64__7_u_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+define void @s_shuffle_v3i64_v4i64__7_0_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_3() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+define void @s_shuffle_v3i64_v4i64__7_1_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_3:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12446,831 +13253,783 @@ define void @s_shuffle_v3i64_v4i64__7_6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
+define void @s_shuffle_v3i64_v4i64__7_2_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 0, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
+define void @s_shuffle_v3i64_v4i64__7_3_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 1, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+define void @s_shuffle_v3i64_v4i64__7_5_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 2, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+define void @s_shuffle_v3i64_v4i64__7_6_4() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 3, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__5_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+define void @s_shuffle_v3i64_v4i64__u_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__6_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+define void @s_shuffle_v3i64_v4i64__0_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+define void @s_shuffle_v3i64_v4i64__1_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+define void @s_shuffle_v3i64_v4i64__2_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+define void @s_shuffle_v3i64_v4i64__3_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+define void @s_shuffle_v3i64_v4i64__4_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+define void @s_shuffle_v3i64_v4i64__5_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+define void @s_shuffle_v3i64_v4i64__6_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_5_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+define void @s_shuffle_v3i64_v4i64__7_5_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_4() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+define void @s_shuffle_v3i64_v4i64__7_u_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_4:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_5_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
+define void @s_shuffle_v3i64_v4i64__7_0_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -13278,17 +14037,19 @@ define void @s_shuffle_v3i64_v4i64__0_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -13296,68 +14057,68 @@ define void @s_shuffle_v3i64_v4i64__0_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_5_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_5_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
+define void @s_shuffle_v3i64_v4i64__7_1_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_5_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13366,231 +14127,145 @@ define void @s_shuffle_v3i64_v4i64__1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_5_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
+define void @s_shuffle_v3i64_v4i64__7_2_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_5_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_5_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
+define void @s_shuffle_v3i64_v4i64__7_3_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_5_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__5_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__6_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_5_5() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_u_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
+define void @s_shuffle_v3i64_v4i64__7_4_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13598,6 +14273,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13605,7 +14282,7 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13613,6 +14290,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13620,41 +14299,37 @@ define void @s_shuffle_v3i64_v4i64__7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_0_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
+define void @s_shuffle_v3i64_v4i64__7_6_5() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -13662,19 +14337,16 @@ define void @s_shuffle_v3i64_v4i64__7_0_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -13682,35 +14354,73 @@ define void @s_shuffle_v3i64_v4i64__7_0_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_0_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3i64_v4i64__u_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__u_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__u_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__u_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_1_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
+define void @s_shuffle_v3i64_v4i64__0_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13719,16 +14429,16 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13737,16 +14447,16 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_1_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13755,360 +14465,300 @@ define void @s_shuffle_v3i64_v4i64__7_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_2_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
+define void @s_shuffle_v3i64_v4i64__1_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_2_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_3_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
+define void @s_shuffle_v3i64_v4i64__2_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_3_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_4_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
+define void @s_shuffle_v3i64_v4i64__3_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_4_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__7_6_5() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
+define void @s_shuffle_v3i64_v4i64__4_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_5:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__u_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__u_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__0_6_6() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
+define void @s_shuffle_v3i64_v4i64__5_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__5_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__0_6_6:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__5_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__1_6_6() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
+define void @s_shuffle_v3i64_v4i64__6_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14116,17 +14766,14 @@ define void @s_shuffle_v3i64_v4i64__1_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14134,42 +14781,34 @@ define void @s_shuffle_v3i64_v4i64__1_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__1_6_6:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__2_6_6() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
+define void @s_shuffle_v3i64_v4i64__7_6_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_6_6:
; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14177,17 +14816,14 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14195,190 +14831,63 @@ define void @s_shuffle_v3i64_v4i64__2_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__2_6_6:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
ret void
}
-define void @s_shuffle_v3i64_v4i64__3_6_6() {
-; GFX900-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
+define void @s_shuffle_v3i64_v4i64__7_u_6() {
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_u_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3i64_v4i64__3_6_6:
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_u_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__4_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__5_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__5_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__6_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_6_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3i64_v4i64__7_u_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_u_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
@@ -14432,10 +14941,8 @@ define void @s_shuffle_v3i64_v4i64__7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14493,10 +15000,8 @@ define void @s_shuffle_v3i64_v4i64__7_1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14554,10 +15059,8 @@ define void @s_shuffle_v3i64_v4i64__7_2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14615,10 +15118,8 @@ define void @s_shuffle_v3i64_v4i64__7_3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14671,12 +15172,9 @@ define void @s_shuffle_v3i64_v4i64__7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14689,18 +15187,43 @@ define void @s_shuffle_v3i64_v4i64__7_4_6() {
}
define void @s_shuffle_v3i64_v4i64__7_5_6() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
@@ -14745,10 +15268,8 @@ define void @s_shuffle_v3i64_v4i64__u_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14806,10 +15327,8 @@ define void @s_shuffle_v3i64_v4i64__0_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14871,12 +15390,9 @@ define void @s_shuffle_v3i64_v4i64__1_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14934,12 +15450,9 @@ define void @s_shuffle_v3i64_v4i64__2_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15001,12 +15514,9 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15019,20 +15529,48 @@ define void @s_shuffle_v3i64_v4i64__3_7_7() {
}
define void @s_shuffle_v3i64_v4i64__4_7_7() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__4_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__4_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__4_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__4_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
@@ -15081,12 +15619,9 @@ define void @s_shuffle_v3i64_v4i64__5_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15139,12 +15674,9 @@ define void @s_shuffle_v3i64_v4i64__6_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15193,10 +15725,8 @@ define void @s_shuffle_v3i64_v4i64__7_u_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15258,12 +15788,9 @@ define void @s_shuffle_v3i64_v4i64__7_0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15321,10 +15848,8 @@ define void @s_shuffle_v3i64_v4i64__7_1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15386,12 +15911,9 @@ define void @s_shuffle_v3i64_v4i64__7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15449,12 +15971,9 @@ define void @s_shuffle_v3i64_v4i64__7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15507,12 +16026,9 @@ define void @s_shuffle_v3i64_v4i64__7_4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15525,20 +16041,48 @@ define void @s_shuffle_v3i64_v4i64__7_4_7() {
}
define void @s_shuffle_v3i64_v4i64__7_5_7() {
-; GFX9-LABEL: s_shuffle_v3i64_v4i64__7_5_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3i64_v4i64__7_5_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3i64_v4i64__7_5_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3i64_v4i64__7_5_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
@@ -15587,12 +16131,9 @@ define void @s_shuffle_v3i64_v4i64__7_6_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
index fe132493ce536..9d557a317986d 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v2p0.ll
@@ -45,6 +45,7 @@ define void @v_shuffle_v3p0_v2p0__0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -84,10 +85,11 @@ define void @v_shuffle_v3p0_v2p0__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -140,10 +142,11 @@ define void @v_shuffle_v3p0_v2p0__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -203,7 +206,7 @@ define void @v_shuffle_v3p0_v2p0__3_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -254,14 +257,14 @@ define void @v_shuffle_v3p0_v2p0__3_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -308,7 +311,7 @@ define void @v_shuffle_v3p0_v2p0__3_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
@@ -351,10 +354,11 @@ define void @v_shuffle_v3p0_v2p0__3_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v2p0__3_3_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -1472,6 +1476,7 @@ define void @v_shuffle_v3p0_v2p0__0_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -1511,10 +1516,11 @@ define void @v_shuffle_v3p0_v2p0__1_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v2p0__1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -2266,8 +2272,7 @@ define void @s_shuffle_v3p0_v2p0__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2325,8 +2330,7 @@ define void @s_shuffle_v3p0_v2p0__3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2384,10 +2388,8 @@ define void @s_shuffle_v3p0_v2p0__3_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2441,8 +2443,7 @@ define void @s_shuffle_v3p0_v2p0__3_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2491,10 +2492,8 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2507,18 +2506,43 @@ define void @s_shuffle_v3p0_v2p0__3_2_u() {
}
define void @s_shuffle_v3p0_v2p0__3_3_u() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_3_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_3_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_3_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 3, i32 poison>
@@ -2572,10 +2596,8 @@ define void @s_shuffle_v3p0_v2p0__3_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2633,10 +2655,8 @@ define void @s_shuffle_v3p0_v2p0__3_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2689,12 +2709,9 @@ define void @s_shuffle_v3p0_v2p0__3_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2707,20 +2724,48 @@ define void @s_shuffle_v3p0_v2p0__3_3_2() {
}
define void @s_shuffle_v3p0_v2p0__3_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 3, i32 3, i32 3>
@@ -2765,10 +2810,8 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2780,20 +2823,48 @@ define void @s_shuffle_v3p0_v2p0__u_0_0() {
}
define void @s_shuffle_v3p0_v2p0__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -2841,12 +2912,9 @@ define void @s_shuffle_v3p0_v2p0__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2894,10 +2962,8 @@ define void @s_shuffle_v3p0_v2p0__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -2958,12 +3024,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3021,10 +3084,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3086,12 +3147,9 @@ define void @s_shuffle_v3p0_v2p0__3_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3153,12 +3211,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3171,18 +3226,43 @@ define void @s_shuffle_v3p0_v2p0__3_2_0() {
}
define void @s_shuffle_v3p0_v2p0__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__u_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__u_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -3190,18 +3270,43 @@ define void @s_shuffle_v3p0_v2p0__u_1_1() {
}
define void @s_shuffle_v3p0_v2p0__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__0_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__0_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__0_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -3209,20 +3314,48 @@ define void @s_shuffle_v3p0_v2p0__0_1_1() {
}
define void @s_shuffle_v3p0_v2p0__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -3230,18 +3363,43 @@ define void @s_shuffle_v3p0_v2p0__1_1_1() {
}
define void @s_shuffle_v3p0_v2p0__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__2_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__2_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -3294,10 +3452,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3355,10 +3511,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3420,12 +3574,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3487,12 +3638,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3591,8 +3739,7 @@ define void @s_shuffle_v3p0_v2p0__1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3658,12 +3805,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3712,10 +3856,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3773,12 +3915,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3836,10 +3975,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3852,18 +3989,43 @@ define void @s_shuffle_v3p0_v2p0__3_1_2() {
}
define void @s_shuffle_v3p0_v2p0__u_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__u_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 poison, i32 3, i32 3>
@@ -3917,10 +4079,8 @@ define void @s_shuffle_v3p0_v2p0__0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3978,10 +4138,8 @@ define void @s_shuffle_v3p0_v2p0__1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -3994,18 +4152,43 @@ define void @s_shuffle_v3p0_v2p0__1_3_3() {
}
define void @s_shuffle_v3p0_v2p0__2_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v2p0__2_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v2p0__2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v2p0__2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v2p0__2_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <3 x i32> <i32 2, i32 3, i32 3>
@@ -4050,10 +4233,8 @@ define void @s_shuffle_v3p0_v2p0__3_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4115,12 +4296,9 @@ define void @s_shuffle_v3p0_v2p0__3_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4178,10 +4356,8 @@ define void @s_shuffle_v3p0_v2p0__3_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4234,12 +4410,9 @@ define void @s_shuffle_v3p0_v2p0__3_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
index b6f4e3091b61f..84cc0535469f3 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v3p0.ll
@@ -87,6 +87,7 @@ define void @v_shuffle_v3p0_v3p0__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -126,10 +127,11 @@ define void @v_shuffle_v3p0_v3p0__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -182,6 +184,7 @@ define void @v_shuffle_v3p0_v3p0__4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -222,10 +225,11 @@ define void @v_shuffle_v3p0_v3p0__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -285,7 +289,7 @@ define void @v_shuffle_v3p0_v3p0__5_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -336,10 +340,11 @@ define void @v_shuffle_v3p0_v3p0__5_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
@@ -399,7 +404,7 @@ define void @v_shuffle_v3p0_v3p0__5_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v10
; GFX942-NEXT: v_mov_b32_e32 v3, v11
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
@@ -454,6 +459,7 @@ define void @v_shuffle_v3p0_v3p0__5_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -494,10 +500,11 @@ define void @v_shuffle_v3p0_v3p0__5_4_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__5_4_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -546,6 +553,7 @@ define void @v_shuffle_v3p0_v3p0__5_5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2902,6 +2910,7 @@ define void @v_shuffle_v3p0_v3p0__1_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2941,10 +2950,11 @@ define void @v_shuffle_v3p0_v3p0__2_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v3p0__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx2 v6, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -4720,8 +4730,7 @@ define void @s_shuffle_v3p0_v3p0__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4761,8 +4770,7 @@ define void @s_shuffle_v3p0_v3p0__2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4820,8 +4828,7 @@ define void @s_shuffle_v3p0_v3p0__4_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4862,8 +4869,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4918,11 +4924,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -4972,8 +4978,7 @@ define void @s_shuffle_v3p0_v3p0__5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5027,10 +5032,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5075,10 +5078,8 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5091,18 +5092,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_u() {
}
define void @s_shuffle_v3p0_v3p0__5_4_u() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 poison>
@@ -5147,10 +5173,8 @@ define void @s_shuffle_v3p0_v3p0__5_5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5212,12 +5236,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5279,12 +5300,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5342,10 +5360,8 @@ define void @s_shuffle_v3p0_v3p0__5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5394,12 +5410,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5452,12 +5465,9 @@ define void @s_shuffle_v3p0_v3p0__5_5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5470,20 +5480,48 @@ define void @s_shuffle_v3p0_v3p0__5_5_4() {
}
define void @s_shuffle_v3p0_v3p0__5_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
@@ -5528,10 +5566,8 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5543,20 +5579,48 @@ define void @s_shuffle_v3p0_v3p0__u_0_0() {
}
define void @s_shuffle_v3p0_v3p0__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -5604,12 +5668,9 @@ define void @s_shuffle_v3p0_v3p0__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5657,12 +5718,9 @@ define void @s_shuffle_v3p0_v3p0__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5710,10 +5768,8 @@ define void @s_shuffle_v3p0_v3p0__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5771,15 +5827,12 @@ define void @s_shuffle_v3p0_v3p0__4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5838,13 +5891,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5899,11 +5950,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -5962,13 +6013,11 @@ define void @s_shuffle_v3p0_v3p0__5_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6022,12 +6071,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6086,13 +6132,11 @@ define void @s_shuffle_v3p0_v3p0__5_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6150,10 +6194,8 @@ define void @s_shuffle_v3p0_v3p0__5_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6166,116 +6208,12 @@ define void @s_shuffle_v3p0_v3p0__5_4_0() {
}
define void @s_shuffle_v3p0_v3p0__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__4_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -6283,17 +6221,12 @@ define void @s_shuffle_v3p0_v3p0__4_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -6301,40 +6234,30 @@ define void @s_shuffle_v3p0_v3p0__4_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 1, i32 1>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v3p0__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+define void @s_shuffle_v3p0_v3p0__0_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -6342,15 +6265,12 @@ define void @s_shuffle_v3p0_v3p0__5_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -6358,19 +6278,268 @@ define void @s_shuffle_v3p0_v3p0__5_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__1_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__2_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__3_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__4_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6425,11 +6594,11 @@ define void @s_shuffle_v3p0_v3p0__5_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6488,13 +6657,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6552,12 +6719,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6577,217 +6741,344 @@ define void @s_shuffle_v3p0_v3p0__5_3_1() {
; GFX900-NEXT: ; def s[4:9]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ; def s[12:17]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__5_4_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:9]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:9]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:5]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__u_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__0_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__1_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v3p0__2_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_3_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:17]
+; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_3_1:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:9]
+; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 1>
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v3p0__5_4_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+define void @s_shuffle_v3p0_v3p0__3_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:9]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:13]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:9]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:13]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_1:
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:5]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %vec1 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__0_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v3p0__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <3 x i32> <i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -6840,10 +7131,8 @@ define void @s_shuffle_v3p0_v3p0__4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6897,10 +7186,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -6950,8 +7237,7 @@ define void @s_shuffle_v3p0_v3p0__5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7009,12 +7295,9 @@ define void @s_shuffle_v3p0_v3p0__5_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7064,8 +7347,7 @@ define void @s_shuffle_v3p0_v3p0__5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7119,10 +7401,8 @@ define void @s_shuffle_v3p0_v3p0__5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7180,10 +7460,8 @@ define void @s_shuffle_v3p0_v3p0__5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7282,8 +7560,7 @@ define void @s_shuffle_v3p0_v3p0__1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7323,8 +7600,7 @@ define void @s_shuffle_v3p0_v3p0__2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7390,12 +7666,9 @@ define void @s_shuffle_v3p0_v3p0__4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7444,12 +7717,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7494,10 +7764,8 @@ define void @s_shuffle_v3p0_v3p0__5_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7552,13 +7820,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7612,10 +7878,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7673,10 +7937,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:17]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7725,12 +7987,9 @@ define void @s_shuffle_v3p0_v3p0__5_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7743,18 +8002,43 @@ define void @s_shuffle_v3p0_v3p0__5_4_3() {
}
define void @s_shuffle_v3p0_v3p0__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 poison, i32 4, i32 4>
@@ -7808,10 +8092,8 @@ define void @s_shuffle_v3p0_v3p0__0_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7869,10 +8151,8 @@ define void @s_shuffle_v3p0_v3p0__1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7930,10 +8210,8 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -7946,18 +8224,43 @@ define void @s_shuffle_v3p0_v3p0__2_4_4() {
}
define void @s_shuffle_v3p0_v3p0__3_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 3, i32 4, i32 4>
@@ -7966,20 +8269,48 @@ define void @s_shuffle_v3p0_v3p0__3_4_4() {
}
define void @s_shuffle_v3p0_v3p0__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 4, i32 4>
@@ -7988,20 +8319,48 @@ define void @s_shuffle_v3p0_v3p0__4_4_4() {
}
define void @s_shuffle_v3p0_v3p0__5_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
@@ -8042,10 +8401,8 @@ define void @s_shuffle_v3p0_v3p0__5_u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8104,13 +8461,11 @@ define void @s_shuffle_v3p0_v3p0__5_0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8164,10 +8519,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8225,12 +8578,9 @@ define void @s_shuffle_v3p0_v3p0__5_2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8279,12 +8629,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8297,18 +8644,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_4() {
}
define void @s_shuffle_v3p0_v3p0__u_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
@@ -8362,10 +8734,8 @@ define void @s_shuffle_v3p0_v3p0__0_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8423,10 +8793,8 @@ define void @s_shuffle_v3p0_v3p0__1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8484,10 +8852,8 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8500,18 +8866,43 @@ define void @s_shuffle_v3p0_v3p0__2_5_5() {
}
define void @s_shuffle_v3p0_v3p0__3_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__3_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__3_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__3_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__3_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
@@ -8520,20 +8911,48 @@ define void @s_shuffle_v3p0_v3p0__3_5_5() {
}
define void @s_shuffle_v3p0_v3p0__4_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__4_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__4_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__4_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
@@ -8542,18 +8961,43 @@ define void @s_shuffle_v3p0_v3p0__4_5_5() {
}
define void @s_shuffle_v3p0_v3p0__5_u_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_u_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_u_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_u_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_u_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 poison, i32 5>
@@ -8607,10 +9051,8 @@ define void @s_shuffle_v3p0_v3p0__5_0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8668,10 +9110,8 @@ define void @s_shuffle_v3p0_v3p0__5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8729,10 +9169,8 @@ define void @s_shuffle_v3p0_v3p0__5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8785,12 +9223,9 @@ define void @s_shuffle_v3p0_v3p0__5_3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8803,18 +9238,43 @@ define void @s_shuffle_v3p0_v3p0__5_3_5() {
}
define void @s_shuffle_v3p0_v3p0__5_4_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v3p0__5_4_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v3p0__5_4_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v3p0__5_4_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v3p0__5_4_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 5>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
index b03066e66cf66..3e9a69b3de166 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v3p0.v4p0.ll
@@ -87,6 +87,7 @@ define void @v_shuffle_v3p0_v4p0__1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -126,6 +127,7 @@ define void @v_shuffle_v3p0_v4p0__2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -165,10 +167,11 @@ define void @v_shuffle_v3p0_v4p0__3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -221,6 +224,7 @@ define void @v_shuffle_v3p0_v4p0__5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -261,6 +265,7 @@ define void @v_shuffle_v3p0_v4p0__6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -301,10 +306,11 @@ define void @v_shuffle_v3p0_v4p0__7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -364,7 +370,7 @@ define void @v_shuffle_v3p0_v4p0__7_0_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v10, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v8
; GFX942-NEXT: v_mov_b32_e32 v3, v9
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -415,10 +421,11 @@ define void @v_shuffle_v3p0_v4p0__7_1_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_1_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx2 v12, v[0:1], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
@@ -478,7 +485,7 @@ define void @v_shuffle_v3p0_v4p0__7_2_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v14, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v2, v12
; GFX942-NEXT: v_mov_b32_e32 v3, v13
; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
@@ -534,7 +541,7 @@ define void @v_shuffle_v3p0_v4p0__7_3_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx2 v16, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v14
; GFX942-NEXT: v_mov_b32_e32 v5, v15
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
@@ -589,6 +596,7 @@ define void @v_shuffle_v3p0_v4p0__7_4_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -629,10 +637,11 @@ define void @v_shuffle_v3p0_v4p0__7_5_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v4p0__7_5_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -681,6 +690,7 @@ define void @v_shuffle_v3p0_v4p0__7_6_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -727,6 +737,7 @@ define void @v_shuffle_v3p0_v4p0__7_7_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4719,6 +4730,7 @@ define void @v_shuffle_v3p0_v4p0__1_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4758,6 +4770,7 @@ define void @v_shuffle_v3p0_v4p0__2_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4797,10 +4810,11 @@ define void @v_shuffle_v3p0_v4p0__3_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v3p0_v4p0__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx2 v8, v[0:1], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -8016,8 +8030,7 @@ define void @s_shuffle_v3p0_v4p0__1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8057,8 +8070,7 @@ define void @s_shuffle_v3p0_v4p0__2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8102,8 +8114,7 @@ define void @s_shuffle_v3p0_v4p0__3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8161,8 +8172,7 @@ define void @s_shuffle_v3p0_v4p0__5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8203,8 +8213,7 @@ define void @s_shuffle_v3p0_v4p0__6_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8249,8 +8258,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8309,10 +8317,8 @@ define void @s_shuffle_v3p0_v4p0__7_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8366,8 +8372,7 @@ define void @s_shuffle_v3p0_v4p0__7_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8425,10 +8430,8 @@ define void @s_shuffle_v3p0_v4p0__7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8482,10 +8485,8 @@ define void @s_shuffle_v3p0_v4p0__7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8534,10 +8535,8 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8550,18 +8549,43 @@ define void @s_shuffle_v3p0_v4p0__7_4_u() {
}
define void @s_shuffle_v3p0_v4p0__7_5_u() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
@@ -8606,10 +8630,8 @@ define void @s_shuffle_v3p0_v4p0__7_6_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8654,10 +8676,8 @@ define void @s_shuffle_v3p0_v4p0__7_7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8716,13 +8736,11 @@ define void @s_shuffle_v3p0_v4p0__7_7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8781,13 +8799,11 @@ define void @s_shuffle_v3p0_v4p0__7_7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8841,10 +8857,8 @@ define void @s_shuffle_v3p0_v4p0__7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8902,12 +8916,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -8956,12 +8967,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9010,12 +9018,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9028,20 +9033,48 @@ define void @s_shuffle_v3p0_v4p0__7_7_5() {
}
define void @s_shuffle_v3p0_v4p0__7_7_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_7_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
@@ -9090,12 +9123,9 @@ define void @s_shuffle_v3p0_v4p0__7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9144,10 +9174,8 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9159,20 +9187,48 @@ define void @s_shuffle_v3p0_v4p0__u_0_0() {
}
define void @s_shuffle_v3p0_v4p0__0_0_0() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
@@ -9220,12 +9276,9 @@ define void @s_shuffle_v3p0_v4p0__1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9273,12 +9326,9 @@ define void @s_shuffle_v3p0_v4p0__2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9330,12 +9380,9 @@ define void @s_shuffle_v3p0_v4p0__3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9383,10 +9430,8 @@ define void @s_shuffle_v3p0_v4p0__4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9444,15 +9489,12 @@ define void @s_shuffle_v3p0_v4p0__5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9511,13 +9553,11 @@ define void @s_shuffle_v3p0_v4p0__6_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9576,15 +9616,12 @@ define void @s_shuffle_v3p0_v4p0__7_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9639,13 +9676,11 @@ define void @s_shuffle_v3p0_v4p0__7_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9704,15 +9739,12 @@ define void @s_shuffle_v3p0_v4p0__7_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9770,12 +9802,9 @@ define void @s_shuffle_v3p0_v4p0__7_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9833,12 +9862,9 @@ define void @s_shuffle_v3p0_v4p0__7_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9897,15 +9923,12 @@ define void @s_shuffle_v3p0_v4p0__7_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -9963,10 +9986,8 @@ define void @s_shuffle_v3p0_v4p0__7_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -10028,12 +10049,9 @@ define void @s_shuffle_v3p0_v4p0__7_6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -10046,137 +10064,12 @@ define void @s_shuffle_v3p0_v4p0__7_6_0() {
}
define void @s_shuffle_v3p0_v4p0__u_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__0_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__1_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__2_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__3_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_1_1() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__5_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10184,17 +10077,12 @@ define void @s_shuffle_v3p0_v4p0__5_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10202,42 +10090,30 @@ define void @s_shuffle_v3p0_v4p0__5_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
+define void @s_shuffle_v3p0_v4p0__0_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10245,17 +10121,12 @@ define void @s_shuffle_v3p0_v4p0__6_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10263,42 +10134,32 @@ define void @s_shuffle_v3p0_v4p0__6_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+define void @s_shuffle_v3p0_v4p0__1_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
@@ -10306,17 +10167,14 @@ define void @s_shuffle_v3p0_v4p0__7_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
@@ -10324,263 +10182,204 @@ define void @s_shuffle_v3p0_v4p0__7_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+define void @s_shuffle_v3p0_v4p0__2_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+define void @s_shuffle_v3p0_v4p0__3_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+define void @s_shuffle_v3p0_v4p0__4_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+define void @s_shuffle_v3p0_v4p0__5_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10589,128 +10388,116 @@ define void @s_shuffle_v3p0_v4p0__7_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+define void @s_shuffle_v3p0_v4p0__6_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+define void @s_shuffle_v3p0_v4p0__7_1_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10719,23 +10506,21 @@ define void @s_shuffle_v3p0_v4p0__7_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_1() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+define void @s_shuffle_v3p0_v4p0__7_u_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -10746,8 +10531,6 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -10755,7 +10538,7 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -10766,8 +10549,6 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -10775,190 +10556,134 @@ define void @s_shuffle_v3p0_v4p0__7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__0_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__1_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__2_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__3_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__4_2_2() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v3p0_v4p0__7_0_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+define void @s_shuffle_v3p0_v4p0__7_2_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -10967,55 +10692,58 @@ define void @s_shuffle_v3p0_v4p0__5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+define void @s_shuffle_v3p0_v4p0__7_3_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11024,238 +10752,1418 @@ define void @s_shuffle_v3p0_v4p0__6_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+define void @s_shuffle_v3p0_v4p0__7_4_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_5_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_6_1() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__u_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__0_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__1_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__2_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__3_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__4_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__5_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__6_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_2_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_u_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_0_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_1_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_3_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_4_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_5_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__7_6_2() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__u_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__0_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__1_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__2_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+define void @s_shuffle_v3p0_v4p0__3_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+define void @s_shuffle_v3p0_v4p0__4_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+define void @s_shuffle_v3p0_v4p0__5_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11264,21 +12172,22 @@ define void @s_shuffle_v3p0_v4p0__7_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+define void @s_shuffle_v3p0_v4p0__6_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11287,16 +12196,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11305,16 +12214,16 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11323,23 +12232,22 @@ define void @s_shuffle_v3p0_v4p0__7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+define void @s_shuffle_v3p0_v4p0__7_3_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -11350,14 +12258,16 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -11368,14 +12278,16 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11384,59 +12296,58 @@ define void @s_shuffle_v3p0_v4p0__7_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+define void @s_shuffle_v3p0_v4p0__7_u_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11445,59 +12356,61 @@ define void @s_shuffle_v3p0_v4p0__7_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_2() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+define void @s_shuffle_v3p0_v4p0__7_0_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_2:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -11506,30 +12419,32 @@ define void @s_shuffle_v3p0_v4p0__7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__u_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+define void @s_shuffle_v3p0_v4p0__7_1_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11537,14 +12452,17 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11552,58 +12470,42 @@ define void @s_shuffle_v3p0_v4p0__u_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__0_3_3() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__0_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+define void @s_shuffle_v3p0_v4p0__7_2_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11611,16 +12513,19 @@ define void @s_shuffle_v3p0_v4p0__1_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11628,39 +12533,43 @@ define void @s_shuffle_v3p0_v4p0__1_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+define void @s_shuffle_v3p0_v4p0__7_4_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s12
-; GFX900-NEXT: s_mov_b32 s9, s13
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11668,16 +12577,19 @@ define void @s_shuffle_v3p0_v4p0__2_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s12
-; GFX90A-NEXT: s_mov_b32 s9, s13
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11685,94 +12597,102 @@ define void @s_shuffle_v3p0_v4p0__2_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+define void @s_shuffle_v3p0_v4p0__7_5_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__4_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+define void @s_shuffle_v3p0_v4p0__7_6_3() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -11780,14 +12700,19 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -11795,649 +12720,531 @@ define void @s_shuffle_v3p0_v4p0__4_3_3() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 3, i32 3>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+define void @s_shuffle_v3p0_v4p0__u_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__0_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+define void @s_shuffle_v3p0_v4p0__1_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_3_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
+define void @s_shuffle_v3p0_v4p0__2_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_3:
-; GFX942: ; %bb.0:
-; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+define void @s_shuffle_v3p0_v4p0__3_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+define void @s_shuffle_v3p0_v4p0__4_4_4() {
+; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:13]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__5_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+define void @s_shuffle_v3p0_v4p0__6_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+define void @s_shuffle_v3p0_v4p0__7_4_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+define void @s_shuffle_v3p0_v4p0__7_u_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+define void @s_shuffle_v3p0_v4p0__7_0_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_3() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+define void @s_shuffle_v3p0_v4p0__7_1_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_3:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -12446,831 +13253,783 @@ define void @s_shuffle_v3p0_v4p0__7_6_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
+define void @s_shuffle_v3p0_v4p0__7_2_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 0, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
+define void @s_shuffle_v3p0_v4p0__7_3_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 1, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+define void @s_shuffle_v3p0_v4p0__7_5_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 2, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+define void @s_shuffle_v3p0_v4p0__7_6_4() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 3, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_4_4() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <3 x i32> <i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__5_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+define void @s_shuffle_v3p0_v4p0__u_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__6_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+define void @s_shuffle_v3p0_v4p0__0_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+define void @s_shuffle_v3p0_v4p0__1_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+define void @s_shuffle_v3p0_v4p0__2_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+define void @s_shuffle_v3p0_v4p0__3_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+define void @s_shuffle_v3p0_v4p0__4_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+define void @s_shuffle_v3p0_v4p0__5_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+define void @s_shuffle_v3p0_v4p0__6_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_5_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+define void @s_shuffle_v3p0_v4p0__7_5_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_4() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+define void @s_shuffle_v3p0_v4p0__7_u_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_4:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_5_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
+define void @s_shuffle_v3p0_v4p0__7_0_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -13278,17 +14037,19 @@ define void @s_shuffle_v3p0_v4p0__0_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -13296,68 +14057,68 @@ define void @s_shuffle_v3p0_v4p0__0_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_5_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_5_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
+define void @s_shuffle_v3p0_v4p0__7_1_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_5_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13366,231 +14127,145 @@ define void @s_shuffle_v3p0_v4p0__1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_5_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
+define void @s_shuffle_v3p0_v4p0__7_2_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_5_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_5_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
+define void @s_shuffle_v3p0_v4p0__7_3_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_5_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__5_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__6_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_5_5() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_u_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
+define void @s_shuffle_v3p0_v4p0__7_4_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13598,6 +14273,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -13605,7 +14282,7 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13613,6 +14290,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -13620,41 +14299,37 @@ define void @s_shuffle_v3p0_v4p0__7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_0_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
+define void @s_shuffle_v3p0_v4p0__7_6_5() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -13662,19 +14337,16 @@ define void @s_shuffle_v3p0_v4p0__7_0_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -13682,35 +14354,73 @@ define void @s_shuffle_v3p0_v4p0__7_0_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_0_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v3p0_v4p0__u_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__u_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__u_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__u_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_1_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
+define void @s_shuffle_v3p0_v4p0__0_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -13719,16 +14429,16 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -13737,16 +14447,16 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_1_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -13755,360 +14465,300 @@ define void @s_shuffle_v3p0_v4p0__7_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_2_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
+define void @s_shuffle_v3p0_v4p0__1_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_2_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_3_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
+define void @s_shuffle_v3p0_v4p0__2_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_3_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_4_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
+define void @s_shuffle_v3p0_v4p0__3_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_4_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__7_6_5() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
+define void @s_shuffle_v3p0_v4p0__4_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_5:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__u_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__u_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__0_6_6() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
+define void @s_shuffle_v3p0_v4p0__5_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__5_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__5_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__0_6_6:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__5_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__1_6_6() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
+define void @s_shuffle_v3p0_v4p0__6_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14116,17 +14766,14 @@ define void @s_shuffle_v3p0_v4p0__1_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14134,42 +14781,34 @@ define void @s_shuffle_v3p0_v4p0__1_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__1_6_6:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__2_6_6() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
+define void @s_shuffle_v3p0_v4p0__7_6_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_6_6:
; GFX900: ; %bb.0:
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -14177,17 +14816,14 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -14195,190 +14831,63 @@ define void @s_shuffle_v3p0_v4p0__2_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__2_6_6:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v3p0_v4p0__3_6_6() {
-; GFX900-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
+define void @s_shuffle_v3p0_v4p0__7_u_6() {
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_u_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:13]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_u_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:13]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v3p0_v4p0__3_6_6:
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_u_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__4_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__5_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__5_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__6_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_6_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:13]}"(<3 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v3p0_v4p0__7_u_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_u_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
@@ -14432,10 +14941,8 @@ define void @s_shuffle_v3p0_v4p0__7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14493,10 +15000,8 @@ define void @s_shuffle_v3p0_v4p0__7_1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14554,10 +15059,8 @@ define void @s_shuffle_v3p0_v4p0__7_2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14615,10 +15118,8 @@ define void @s_shuffle_v3p0_v4p0__7_3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14671,12 +15172,9 @@ define void @s_shuffle_v3p0_v4p0__7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14689,18 +15187,43 @@ define void @s_shuffle_v3p0_v4p0__7_4_6() {
}
define void @s_shuffle_v3p0_v4p0__7_5_6() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
@@ -14745,10 +15268,8 @@ define void @s_shuffle_v3p0_v4p0__u_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14806,10 +15327,8 @@ define void @s_shuffle_v3p0_v4p0__0_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14871,12 +15390,9 @@ define void @s_shuffle_v3p0_v4p0__1_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -14934,12 +15450,9 @@ define void @s_shuffle_v3p0_v4p0__2_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15001,12 +15514,9 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15019,20 +15529,48 @@ define void @s_shuffle_v3p0_v4p0__3_7_7() {
}
define void @s_shuffle_v3p0_v4p0__4_7_7() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__4_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__4_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__4_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__4_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
@@ -15081,12 +15619,9 @@ define void @s_shuffle_v3p0_v4p0__5_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15139,12 +15674,9 @@ define void @s_shuffle_v3p0_v4p0__6_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15193,10 +15725,8 @@ define void @s_shuffle_v3p0_v4p0__7_u_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15258,12 +15788,9 @@ define void @s_shuffle_v3p0_v4p0__7_0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15321,10 +15848,8 @@ define void @s_shuffle_v3p0_v4p0__7_1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15386,12 +15911,9 @@ define void @s_shuffle_v3p0_v4p0__7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15449,12 +15971,9 @@ define void @s_shuffle_v3p0_v4p0__7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15507,12 +16026,9 @@ define void @s_shuffle_v3p0_v4p0__7_4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
@@ -15525,20 +16041,48 @@ define void @s_shuffle_v3p0_v4p0__7_4_7() {
}
define void @s_shuffle_v3p0_v4p0__7_5_7() {
-; GFX9-LABEL: s_shuffle_v3p0_v4p0__7_5_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v3p0_v4p0__7_5_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v3p0_v4p0__7_5_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v3p0_v4p0__7_5_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
@@ -15587,12 +16131,9 @@ define void @s_shuffle_v3p0_v4p0__7_6_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:13]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
index ac7d9557ce765..f2538521727d1 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v2i64.ll
@@ -45,6 +45,7 @@ define void @v_shuffle_v4i64_v2i64__0_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -84,10 +85,12 @@ define void @v_shuffle_v4i64_v2i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -140,10 +143,12 @@ define void @v_shuffle_v4i64_v2i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -203,7 +208,8 @@ define void @v_shuffle_v4i64_v2i64__3_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -254,14 +260,15 @@ define void @v_shuffle_v4i64_v2i64__3_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -308,7 +315,7 @@ define void @v_shuffle_v4i64_v2i64__3_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
@@ -351,10 +358,12 @@ define void @v_shuffle_v4i64_v2i64__3_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v2i64__3_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -2362,6 +2371,7 @@ define void @v_shuffle_v4i64_v2i64__0_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2401,10 +2411,12 @@ define void @v_shuffle_v4i64_v2i64__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v2i64__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -3734,8 +3746,7 @@ define void @s_shuffle_v4i64_v2i64__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3793,8 +3804,7 @@ define void @s_shuffle_v4i64_v2i64__3_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3852,10 +3862,8 @@ define void @s_shuffle_v4i64_v2i64__3_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3909,8 +3917,7 @@ define void @s_shuffle_v4i64_v2i64__3_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3959,10 +3966,8 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3975,18 +3980,43 @@ define void @s_shuffle_v4i64_v2i64__3_2_u_u() {
}
define void @s_shuffle_v4i64_v2i64__3_3_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 poison>
@@ -3995,21 +4025,52 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_u() {
}
define void @s_shuffle_v4i64_v2i64__3_3_0_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 poison>
@@ -4063,10 +4124,8 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4079,20 +4138,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_u() {
}
define void @s_shuffle_v4i64_v2i64__3_3_2_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 poison>
@@ -4101,20 +4188,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_u() {
}
define void @s_shuffle_v4i64_v2i64__3_3_3_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 poison>
@@ -4172,12 +4287,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4190,62 +4302,96 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_0() {
}
define void @s_shuffle_v4i64_v2i64__3_3_3_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_3_3_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_2:
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_3_3_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -4259,14 +4405,10 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4279,22 +4421,53 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_2() {
}
define void @s_shuffle_v4i64_v2i64__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -4303,20 +4476,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_3_3() {
}
define void @s_shuffle_v4i64_v2i64__u_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4324,22 +4525,53 @@ define void @s_shuffle_v4i64_v2i64__u_0_0_0() {
}
define void @s_shuffle_v4i64_v2i64__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -4347,61 +4579,14 @@ define void @s_shuffle_v4i64_v2i64__0_0_0_0() {
}
define void @s_shuffle_v4i64_v2i64__1_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__2_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_0_0:
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -4411,54 +4596,526 @@ define void @s_shuffle_v4i64_v2i64__3_0_0_0() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__2_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_u_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_1_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_2_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 2, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_3_0_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_3_u_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_3_1_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_1_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_u_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_0_0:
+define void @s_shuffle_v4i64_v2i64__3_3_2_0() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -4467,16 +5124,18 @@ define void @s_shuffle_v4i64_v2i64__3_u_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4485,16 +5144,18 @@ define void @s_shuffle_v4i64_v2i64__3_u_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -4503,182 +5164,223 @@ define void @s_shuffle_v4i64_v2i64__3_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 0, i32 0>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_1_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_0_0:
+define void @s_shuffle_v4i64_v2i64__u_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 0, i32 0>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_2_0_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_0_0:
+define void @s_shuffle_v4i64_v2i64__0_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_0_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_0_0:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 2, i32 0, i32 0>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_3_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4i64_v2i64__2_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_3_u_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+define void @s_shuffle_v4i64_v2i64__3_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -4687,16 +5389,18 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4705,16 +5409,18 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_0:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -4723,90 +5429,81 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 0>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_3_1_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_1_0:
+define void @s_shuffle_v4i64_v2i64__3_u_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_1_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_0:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_3_2_0() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_0:
+define void @s_shuffle_v4i64_v2i64__3_0_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -4815,18 +5512,18 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_0:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4835,18 +5532,18 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_0:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -4855,188 +5552,96 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_1_1:
+define void @s_shuffle_v4i64_v2i64__3_2_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_u_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_1_1:
+define void @s_shuffle_v4i64_v2i64__3_3_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -5044,17 +5649,17 @@ define void @s_shuffle_v4i64_v2i64__3_u_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -5062,228 +5667,129 @@ define void @s_shuffle_v4i64_v2i64__3_u_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 1, i32 1>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_0_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_1_1:
+define void @s_shuffle_v4i64_v2i64__3_3_u_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 0, i32 1, i32 1>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_2_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_1_1:
+define void @s_shuffle_v4i64_v2i64__3_3_0_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_3_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_3_u_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_3_0_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 1>
@@ -5341,12 +5847,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5445,8 +5948,7 @@ define void @s_shuffle_v4i64_v2i64__1_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5472,22 +5974,53 @@ define void @s_shuffle_v4i64_v2i64__2_2_2_2() {
}
define void @s_shuffle_v4i64_v2i64__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
@@ -5496,41 +6029,189 @@ define void @s_shuffle_v4i64_v2i64__3_2_2_2() {
}
define void @s_shuffle_v4i64_v2i64__3_u_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_0_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x i64> asm "; def $0", "=s"()
+ %vec1 = call <2 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v2i64__3_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v2i64__3_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_0_2_2:
+define void @s_shuffle_v4i64_v2i64__3_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -5538,19 +6219,16 @@ define void @s_shuffle_v4i64_v2i64__3_0_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_0_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -5558,74 +6236,19 @@ define void @s_shuffle_v4i64_v2i64__3_0_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_0_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_1_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x i64> asm "; def $0", "=s"()
- %vec1 = call <2 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v2i64__3_3_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 2>
@@ -5674,12 +6297,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5741,12 +6361,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5812,14 +6429,10 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5832,20 +6445,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_2() {
}
define void @s_shuffle_v4i64_v2i64__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__u_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
@@ -5854,23 +6495,57 @@ define void @s_shuffle_v4i64_v2i64__u_3_3_3() {
}
define void @s_shuffle_v4i64_v2i64__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__0_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
@@ -5928,12 +6603,9 @@ define void @s_shuffle_v4i64_v2i64__1_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5946,20 +6618,48 @@ define void @s_shuffle_v4i64_v2i64__1_3_3_3() {
}
define void @s_shuffle_v4i64_v2i64__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__2_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
@@ -5968,20 +6668,48 @@ define void @s_shuffle_v4i64_v2i64__2_3_3_3() {
}
define void @s_shuffle_v4i64_v2i64__3_u_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 poison, i32 3, i32 3>
@@ -6039,12 +6767,9 @@ define void @s_shuffle_v4i64_v2i64__3_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -6057,23 +6782,57 @@ define void @s_shuffle_v4i64_v2i64__3_0_3_3() {
}
define void @s_shuffle_v4i64_v2i64__3_1_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_1_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 1, i32 3, i32 3>
@@ -6082,22 +6841,53 @@ define void @s_shuffle_v4i64_v2i64__3_1_3_3() {
}
define void @s_shuffle_v4i64_v2i64__3_2_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_2_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
@@ -6106,20 +6896,48 @@ define void @s_shuffle_v4i64_v2i64__3_2_3_3() {
}
define void @s_shuffle_v4i64_v2i64__3_3_u_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_u_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 3>
@@ -6128,23 +6946,57 @@ define void @s_shuffle_v4i64_v2i64__3_3_u_3() {
}
define void @s_shuffle_v4i64_v2i64__3_3_0_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 3>
@@ -6202,12 +7054,9 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -6220,20 +7069,48 @@ define void @s_shuffle_v4i64_v2i64__3_3_1_3() {
}
define void @s_shuffle_v4i64_v2i64__3_3_2_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v2i64__3_3_2_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x i64> asm "; def $0", "=s"()
%vec1 = call <2 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
index 8dd4a40d00680..d4f80eab1e28e 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll
@@ -87,6 +87,7 @@ define void @v_shuffle_v4i64_v3i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -126,10 +127,12 @@ define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -182,6 +185,7 @@ define void @v_shuffle_v4i64_v3i64__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -222,10 +226,12 @@ define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -285,11 +291,11 @@ define void @v_shuffle_v4i64_v3i64__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -336,14 +342,15 @@ define void @v_shuffle_v4i64_v3i64__5_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -392,14 +399,15 @@ define void @v_shuffle_v4i64_v3i64__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v10
; GFX942-NEXT: v_mov_b32_e32 v3, v11
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
@@ -446,10 +454,12 @@ define void @v_shuffle_v4i64_v3i64__5_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -494,10 +504,12 @@ define void @v_shuffle_v4i64_v3i64__5_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -540,10 +552,12 @@ define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
@@ -4699,6 +4713,7 @@ define void @v_shuffle_v4i64_v3i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4738,10 +4753,12 @@ define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -7818,8 +7835,7 @@ define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7859,8 +7875,7 @@ define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7918,8 +7933,7 @@ define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7960,8 +7974,7 @@ define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8016,11 +8029,11 @@ define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8070,8 +8083,7 @@ define void @s_shuffle_v4i64_v3i64__5_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8125,10 +8137,8 @@ define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8173,10 +8183,8 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8189,18 +8197,43 @@ define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
}
define void @s_shuffle_v4i64_v3i64__5_4_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
@@ -8245,10 +8278,8 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8310,12 +8341,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8377,12 +8405,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8440,10 +8465,8 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8492,12 +8515,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8550,12 +8570,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8568,20 +8585,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
}
define void @s_shuffle_v4i64_v3i64__5_5_5_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
@@ -8639,12 +8684,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8706,12 +8748,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8773,12 +8812,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8835,14 +8871,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8895,14 +8927,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8915,22 +8943,53 @@ define void @s_shuffle_v4i64_v3i64__5_5_5_4() {
}
define void @s_shuffle_v4i64_v3i64__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
@@ -8979,12 +9038,9 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8996,22 +9052,53 @@ define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
}
define void @s_shuffle_v4i64_v3i64__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -9063,14 +9150,10 @@ define void @s_shuffle_v4i64_v3i64__1_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9122,14 +9205,10 @@ define void @s_shuffle_v4i64_v3i64__2_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9181,12 +9260,9 @@ define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9248,17 +9324,13 @@ define void @s_shuffle_v4i64_v3i64__4_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9321,15 +9393,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9388,13 +9457,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9457,15 +9524,12 @@ define void @s_shuffle_v4i64_v3i64__5_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9527,14 +9591,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9597,15 +9657,12 @@ define void @s_shuffle_v4i64_v3i64__5_3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9667,12 +9724,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9738,14 +9792,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9807,12 +9857,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9878,14 +9925,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9951,14 +9994,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10020,12 +10059,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10091,14 +10127,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10111,87 +10143,205 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_0() {
}
define void @s_shuffle_v4i64_v3i64__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v3i64__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__0_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v3i64__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
define void @s_shuffle_v4i64_v3i64__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -10199,20 +10349,48 @@ define void @s_shuffle_v4i64_v3i64__2_1_1_1() {
}
define void @s_shuffle_v4i64_v3i64__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -10269,12 +10447,9 @@ define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10332,12 +10507,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10396,13 +10568,11 @@ define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10465,15 +10635,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10531,14 +10698,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10601,15 +10764,12 @@ define void @s_shuffle_v4i64_v3i64__5_3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10671,12 +10831,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10742,14 +10899,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10811,12 +10964,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10882,14 +11032,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10951,14 +11097,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11020,12 +11162,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11091,14 +11230,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11111,20 +11246,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_1() {
}
define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11132,20 +11295,48 @@ define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
}
define void @s_shuffle_v4i64_v3i64__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11153,22 +11344,53 @@ define void @s_shuffle_v4i64_v3i64__0_2_2_2() {
}
define void @s_shuffle_v4i64_v3i64__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11176,22 +11398,53 @@ define void @s_shuffle_v4i64_v3i64__1_2_2_2() {
}
define void @s_shuffle_v4i64_v3i64__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11199,20 +11452,48 @@ define void @s_shuffle_v4i64_v3i64__2_2_2_2() {
}
define void @s_shuffle_v4i64_v3i64__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -11269,12 +11550,9 @@ define void @s_shuffle_v4i64_v3i64__4_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11332,12 +11610,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11391,10 +11666,8 @@ define void @s_shuffle_v4i64_v3i64__5_u_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11456,14 +11729,10 @@ define void @s_shuffle_v4i64_v3i64__5_0_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11517,10 +11786,8 @@ define void @s_shuffle_v4i64_v3i64__5_1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11578,12 +11845,9 @@ define void @s_shuffle_v4i64_v3i64__5_3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11645,12 +11909,9 @@ define void @s_shuffle_v4i64_v3i64__5_4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11712,12 +11973,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11779,12 +12037,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11846,14 +12101,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11919,14 +12170,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11992,12 +12239,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12063,14 +12307,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12169,8 +12409,7 @@ define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12210,8 +12449,7 @@ define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12281,14 +12519,10 @@ define void @s_shuffle_v4i64_v3i64__4_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12341,14 +12575,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12397,12 +12627,9 @@ define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12465,15 +12692,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12531,12 +12755,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12598,14 +12819,10 @@ define void @s_shuffle_v4i64_v3i64__5_2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12658,14 +12875,10 @@ define void @s_shuffle_v4i64_v3i64__5_4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12722,14 +12935,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12782,12 +12991,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12853,14 +13059,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12926,14 +13128,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12995,12 +13193,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13057,14 +13252,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13077,20 +13268,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_3() {
}
define void @s_shuffle_v4i64_v3i64__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
@@ -13144,12 +13363,9 @@ define void @s_shuffle_v4i64_v3i64__0_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13211,12 +13427,9 @@ define void @s_shuffle_v4i64_v3i64__1_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13278,12 +13491,9 @@ define void @s_shuffle_v4i64_v3i64__2_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13296,20 +13506,48 @@ define void @s_shuffle_v4i64_v3i64__2_4_4_4() {
}
define void @s_shuffle_v4i64_v3i64__3_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
@@ -13318,22 +13556,53 @@ define void @s_shuffle_v4i64_v3i64__3_4_4_4() {
}
define void @s_shuffle_v4i64_v3i64__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
@@ -13342,22 +13611,53 @@ define void @s_shuffle_v4i64_v3i64__4_4_4_4() {
}
define void @s_shuffle_v4i64_v3i64__5_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
@@ -13402,12 +13702,9 @@ define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13466,15 +13763,12 @@ define void @s_shuffle_v4i64_v3i64__5_0_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13532,12 +13826,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13599,12 +13890,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13657,14 +13945,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13717,14 +14001,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13773,12 +14053,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13840,12 +14117,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13907,12 +14181,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13974,12 +14245,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14028,14 +14296,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14048,20 +14312,48 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_4() {
}
define void @s_shuffle_v4i64_v3i64__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
@@ -14119,12 +14411,9 @@ define void @s_shuffle_v4i64_v3i64__0_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14186,12 +14475,9 @@ define void @s_shuffle_v4i64_v3i64__1_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14253,12 +14539,9 @@ define void @s_shuffle_v4i64_v3i64__2_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14271,20 +14554,48 @@ define void @s_shuffle_v4i64_v3i64__2_5_5_5() {
}
define void @s_shuffle_v4i64_v3i64__3_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
@@ -14293,22 +14604,53 @@ define void @s_shuffle_v4i64_v3i64__3_5_5_5() {
}
define void @s_shuffle_v4i64_v3i64__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
@@ -14317,20 +14659,48 @@ define void @s_shuffle_v4i64_v3i64__4_5_5_5() {
}
define void @s_shuffle_v4i64_v3i64__5_u_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
@@ -14388,12 +14758,9 @@ define void @s_shuffle_v4i64_v3i64__5_0_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14455,12 +14822,9 @@ define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14522,12 +14886,9 @@ define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14584,14 +14945,10 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14604,20 +14961,48 @@ define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
}
define void @s_shuffle_v4i64_v3i64__5_4_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x i64> asm "; def $0", "=s"()
%vec1 = call <3 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
@@ -14666,12 +15051,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14737,14 +15119,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14810,14 +15188,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14879,12 +15253,9 @@ define void @s_shuffle_v4i64_v3i64__5_5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14937,14 +15308,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15001,14 +15368,10 @@ define void @s_shuffle_v4i64_v3i64__5_5_4_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
index ea9ef2f1ac94a..aaaadb95351c0 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v4i64.ll
@@ -87,6 +87,7 @@ define void @v_shuffle_v4i64_v4i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -126,6 +127,7 @@ define void @v_shuffle_v4i64_v4i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -165,10 +167,12 @@ define void @v_shuffle_v4i64_v4i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -221,6 +225,7 @@ define void @v_shuffle_v4i64_v4i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -261,6 +266,7 @@ define void @v_shuffle_v4i64_v4i64__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -301,10 +307,12 @@ define void @v_shuffle_v4i64_v4i64__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -364,11 +372,11 @@ define void @v_shuffle_v4i64_v4i64__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -415,14 +423,15 @@ define void @v_shuffle_v4i64_v4i64__7_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v10
; GFX942-NEXT: v_mov_b32_e32 v1, v11
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
@@ -471,14 +480,15 @@ define void @v_shuffle_v4i64_v4i64__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v12
; GFX942-NEXT: v_mov_b32_e32 v3, v13
; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
@@ -534,7 +544,7 @@ define void @v_shuffle_v4i64_v4i64__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v14
; GFX942-NEXT: v_mov_b32_e32 v5, v15
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
@@ -581,14 +591,15 @@ define void @v_shuffle_v4i64_v4i64__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -629,10 +640,12 @@ define void @v_shuffle_v4i64_v4i64__7_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -675,10 +688,12 @@ define void @v_shuffle_v4i64_v4i64__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v4i64__7_6_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
@@ -727,6 +742,7 @@ define void @v_shuffle_v4i64_v4i64__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7703,6 +7719,7 @@ define void @v_shuffle_v4i64_v4i64__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7742,6 +7759,7 @@ define void @v_shuffle_v4i64_v4i64__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7781,10 +7799,12 @@ define void @v_shuffle_v4i64_v4i64__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4i64_v4i64__3_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -13289,8 +13309,7 @@ define void @s_shuffle_v4i64_v4i64__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13330,8 +13349,7 @@ define void @s_shuffle_v4i64_v4i64__2_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13375,8 +13393,7 @@ define void @s_shuffle_v4i64_v4i64__3_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13434,8 +13451,7 @@ define void @s_shuffle_v4i64_v4i64__5_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13476,8 +13492,7 @@ define void @s_shuffle_v4i64_v4i64__6_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13522,8 +13537,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13582,10 +13596,8 @@ define void @s_shuffle_v4i64_v4i64__7_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13639,8 +13651,7 @@ define void @s_shuffle_v4i64_v4i64__7_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13698,10 +13709,8 @@ define void @s_shuffle_v4i64_v4i64__7_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13755,10 +13764,8 @@ define void @s_shuffle_v4i64_v4i64__7_3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13807,10 +13814,8 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13823,18 +13828,43 @@ define void @s_shuffle_v4i64_v4i64__7_4_u_u() {
}
define void @s_shuffle_v4i64_v4i64__7_5_u_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 poison, i32 poison>
@@ -13879,10 +13909,8 @@ define void @s_shuffle_v4i64_v4i64__7_6_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13927,10 +13955,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13989,13 +14015,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14054,13 +14078,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14114,10 +14136,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14175,12 +14195,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14229,12 +14246,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14283,12 +14297,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14301,20 +14312,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_u() {
}
define void @s_shuffle_v4i64_v4i64__7_7_6_u() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
@@ -14363,12 +14402,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14434,14 +14470,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14507,14 +14539,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14580,14 +14608,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14649,12 +14673,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14711,14 +14732,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14771,14 +14788,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14835,14 +14848,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14855,22 +14864,53 @@ define void @s_shuffle_v4i64_v4i64__7_7_7_6() {
}
define void @s_shuffle_v4i64_v4i64__7_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -14919,12 +14959,9 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14936,22 +14973,53 @@ define void @s_shuffle_v4i64_v4i64__u_0_0_0() {
}
define void @s_shuffle_v4i64_v4i64__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
@@ -15003,14 +15071,10 @@ define void @s_shuffle_v4i64_v4i64__1_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15062,14 +15126,10 @@ define void @s_shuffle_v4i64_v4i64__2_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15125,14 +15185,10 @@ define void @s_shuffle_v4i64_v4i64__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15184,12 +15240,9 @@ define void @s_shuffle_v4i64_v4i64__4_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15251,17 +15304,13 @@ define void @s_shuffle_v4i64_v4i64__5_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15324,15 +15373,12 @@ define void @s_shuffle_v4i64_v4i64__6_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15395,17 +15441,13 @@ define void @s_shuffle_v4i64_v4i64__7_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15464,15 +15506,12 @@ define void @s_shuffle_v4i64_v4i64__7_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15535,17 +15574,13 @@ define void @s_shuffle_v4i64_v4i64__7_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15611,14 +15646,10 @@ define void @s_shuffle_v4i64_v4i64__7_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15680,14 +15711,10 @@ define void @s_shuffle_v4i64_v4i64__7_3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15750,17 +15777,13 @@ define void @s_shuffle_v4i64_v4i64__7_4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15822,12 +15845,9 @@ define void @s_shuffle_v4i64_v4i64__7_5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15893,14 +15913,10 @@ define void @s_shuffle_v4i64_v4i64__7_6_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15963,15 +15979,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16030,13 +16043,11 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16099,15 +16110,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16169,14 +16177,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16238,14 +16242,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16304,15 +16304,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16375,15 +16372,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16445,12 +16439,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16463,149 +16454,12 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_0() {
}
define void @s_shuffle_v4i64_v4i64__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: s_mov_b32 s14, s10
@@ -16615,17 +16469,12 @@ define void @s_shuffle_v4i64_v4i64__5_1_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: s_mov_b32 s14, s10
@@ -16635,44 +16484,31 @@ define void @s_shuffle_v4i64_v4i64__5_1_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
+define void @s_shuffle_v4i64_v4i64__0_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: s_mov_b32 s14, s10
@@ -16682,17 +16518,12 @@ define void @s_shuffle_v4i64_v4i64__6_1_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: s_mov_b32 s14, s10
@@ -16702,44 +16533,33 @@ define void @s_shuffle_v4i64_v4i64__6_1_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
+define void @s_shuffle_v4i64_v4i64__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: s_mov_b32 s14, s10
@@ -16749,17 +16569,14 @@ define void @s_shuffle_v4i64_v4i64__7_1_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: s_mov_b32 s14, s10
@@ -16769,283 +16586,224 @@ define void @s_shuffle_v4i64_v4i64__7_1_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
+define void @s_shuffle_v4i64_v4i64__2_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
+define void @s_shuffle_v4i64_v4i64__3_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
+define void @s_shuffle_v4i64_v4i64__4_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
+define void @s_shuffle_v4i64_v4i64__5_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -17054,140 +16812,126 @@ define void @s_shuffle_v4i64_v4i64__7_3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
+define void @s_shuffle_v4i64_v4i64__6_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
+define void @s_shuffle_v4i64_v4i64__7_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -17196,25 +16940,22 @@ define void @s_shuffle_v4i64_v4i64__7_5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
+define void @s_shuffle_v4i64_v4i64__7_u_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -17225,8 +16966,6 @@ define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -17236,7 +16975,7 @@ define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -17247,8 +16986,6 @@ define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -17258,36 +16995,31 @@ define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
+define void @s_shuffle_v4i64_v4i64__7_0_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -17298,8 +17030,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -17309,7 +17041,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -17320,8 +17052,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -17331,113 +17063,109 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
+define void @s_shuffle_v4i64_v4i64__7_2_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
+define void @s_shuffle_v4i64_v4i64__7_3_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -17445,21 +17173,19 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -17467,135 +17193,141 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
+define void @s_shuffle_v4i64_v4i64__7_4_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
+define void @s_shuffle_v4i64_v4i64__7_5_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -17604,39 +17336,36 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
+define void @s_shuffle_v4i64_v4i64__7_6_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -17644,19 +17373,21 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -17664,48 +17395,46 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
+define void @s_shuffle_v4i64_v4i64__7_7_1_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -17713,21 +17442,21 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -17735,34 +17464,31 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_1() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
+define void @s_shuffle_v4i64_v4i64__7_7_u_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -17782,7 +17508,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -17802,269 +17528,134 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_0_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_2_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18073,65 +17664,59 @@ define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_3_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18140,195 +17725,195 @@ define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_4_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_5_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+define void @s_shuffle_v4i64_v4i64__7_7_6_1() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18337,35 +17922,29 @@ define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+define void @s_shuffle_v4i64_v4i64__u_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18373,19 +17952,14 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18393,46 +17967,33 @@ define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+define void @s_shuffle_v4i64_v4i64__0_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18440,19 +18001,14 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18460,113 +18016,89 @@ define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+define void @s_shuffle_v4i64_v4i64__1_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+define void @s_shuffle_v4i64_v4i64__2_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18574,19 +18106,16 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18594,44 +18123,36 @@ define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+define void @s_shuffle_v4i64_v4i64__3_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18639,17 +18160,16 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18657,44 +18177,34 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
+define void @s_shuffle_v4i64_v4i64__4_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18702,17 +18212,14 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18720,70 +18227,66 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+define void @s_shuffle_v4i64_v4i64__5_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18792,67 +18295,58 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
+define void @s_shuffle_v4i64_v4i64__6_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18861,67 +18355,62 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+define void @s_shuffle_v4i64_v4i64__7_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18930,94 +18419,81 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+define void @s_shuffle_v4i64_v4i64__7_u_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+define void @s_shuffle_v4i64_v4i64__7_0_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19028,8 +18504,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -19037,7 +18515,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19048,8 +18526,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -19057,76 +18537,68 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_2() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+define void @s_shuffle_v4i64_v4i64__7_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19135,157 +18607,21 @@ define void @s_shuffle_v4i64_v4i64__7_7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__1_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19294,18 +18630,18 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19314,18 +18650,18 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19334,25 +18670,22 @@ define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_4_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19361,16 +18694,18 @@ define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19379,16 +18714,18 @@ define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19397,65 +18734,62 @@ define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+define void @s_shuffle_v4i64_v4i64__7_5_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19464,61 +18798,62 @@ define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+define void @s_shuffle_v4i64_v4i64__7_6_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19527,67 +18862,58 @@ define void @s_shuffle_v4i64_v4i64__7_u_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_2_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19596,63 +18922,58 @@ define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_u_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19661,63 +18982,58 @@ define void @s_shuffle_v4i64_v4i64__7_1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_0_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19726,65 +19042,63 @@ define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_1_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19793,65 +19107,63 @@ define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_3_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s18
; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s18
; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19860,187 +19172,192 @@ define void @s_shuffle_v4i64_v4i64__7_5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_4_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+define void @s_shuffle_v4i64_v4i64__7_7_5_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+define void @s_shuffle_v4i64_v4i64__7_7_6_2() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20049,926 +19366,898 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
+define void @s_shuffle_v4i64_v4i64__u_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+define void @s_shuffle_v4i64_v4i64__0_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+define void @s_shuffle_v4i64_v4i64__1_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+define void @s_shuffle_v4i64_v4i64__2_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+define void @s_shuffle_v4i64_v4i64__3_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+define void @s_shuffle_v4i64_v4i64__4_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
+define void @s_shuffle_v4i64_v4i64__5_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
+define void @s_shuffle_v4i64_v4i64__6_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_u_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__5_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__6_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_1_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+define void @s_shuffle_v4i64_v4i64__7_2_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+define void @s_shuffle_v4i64_v4i64__7_4_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+define void @s_shuffle_v4i64_v4i64__7_5_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+define void @s_shuffle_v4i64_v4i64__7_6_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20979,16 +20268,16 @@ define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20999,16 +20288,16 @@ define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21017,25 +20306,22 @@ define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_3_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -21046,18 +20332,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -21068,649 +20350,609 @@ define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_u_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_5_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_0_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_1_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+define void @s_shuffle_v4i64_v4i64__7_7_2_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+define void @s_shuffle_v4i64_v4i64__7_7_4_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+define void @s_shuffle_v4i64_v4i64__7_7_5_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+define void @s_shuffle_v4i64_v4i64__7_7_6_3() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+define void @s_shuffle_v4i64_v4i64__u_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__0_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+define void @s_shuffle_v4i64_v4i64__1_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+define void @s_shuffle_v4i64_v4i64__2_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__3_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -21718,8 +20960,74 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -21727,129 +21035,1656 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+define void @s_shuffle_v4i64_v4i64__6_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_u_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_0_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_1_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_2_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_3_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_5_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_6_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_4_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_u_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_0_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_1_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_2_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_3_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_5_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_7_6_4() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s20
+; GFX900-NEXT: s_mov_b32 s13, s21
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s20
+; GFX90A-NEXT: s_mov_b32 s13, s21
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__u_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__1_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__2_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__3_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__6_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -21857,17 +22692,19 @@ define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -21875,74 +22712,69 @@ define void @s_shuffle_v4i64_v4i64__0_5_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21951,253 +22783,150 @@ define void @s_shuffle_v4i64_v4i64__1_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_5_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__6_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+define void @s_shuffle_v4i64_v4i64__7_4_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22205,6 +22934,8 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -22214,7 +22945,7 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22222,6 +22953,8 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -22231,43 +22964,38 @@ define void @s_shuffle_v4i64_v4i64__7_u_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+define void @s_shuffle_v4i64_v4i64__7_6_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -22275,19 +23003,16 @@ define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -22295,114 +23020,92 @@ define void @s_shuffle_v4i64_v4i64__7_0_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_0_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_1_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_1_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22410,21 +23113,14 @@ define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22432,34 +23128,28 @@ define void @s_shuffle_v4i64_v4i64__7_2_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_2_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_0_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22470,14 +23160,16 @@ define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22488,174 +23180,176 @@ define void @s_shuffle_v4i64_v4i64__7_3_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_3_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_4_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_1_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_4_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_6_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_2_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+define void @s_shuffle_v4i64_v4i64__7_7_3_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22663,16 +23357,19 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22680,33 +23377,31 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_5_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
+define void @s_shuffle_v4i64_v4i64__7_7_4_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22714,6 +23409,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22721,7 +23418,7 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22729,6 +23426,8 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22736,201 +23435,175 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_0_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
+define void @s_shuffle_v4i64_v4i64__7_7_6_5() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s18
; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s18
; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_0_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_1_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
+define void @s_shuffle_v4i64_v4i64__u_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_1_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_2_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
+define void @s_shuffle_v4i64_v4i64__0_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_2_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -22939,311 +23612,271 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_3_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
+define void @s_shuffle_v4i64_v4i64__1_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_3_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_4_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
+define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_4_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__7_7_6_5() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
+define void @s_shuffle_v4i64_v4i64__3_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_5:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__u_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__0_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
+define void @s_shuffle_v4i64_v4i64__4_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__0_6_6_6:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__1_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
+define void @s_shuffle_v4i64_v4i64__5_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23253,17 +23886,14 @@ define void @s_shuffle_v4i64_v4i64__1_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23273,44 +23903,35 @@ define void @s_shuffle_v4i64_v4i64__1_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__1_6_6_6:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
+define void @s_shuffle_v4i64_v4i64__6_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23320,17 +23941,14 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23340,44 +23958,35 @@ define void @s_shuffle_v4i64_v4i64__2_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__2_6_6_6:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
ret void
}
-define void @s_shuffle_v4i64_v4i64__3_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
+define void @s_shuffle_v4i64_v4i64__7_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23387,17 +23996,14 @@ define void @s_shuffle_v4i64_v4i64__3_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23407,119 +24013,19 @@ define void @s_shuffle_v4i64_v4i64__3_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_6_6_6:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__6_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 6, i32 6>
@@ -23528,20 +24034,48 @@ define void @s_shuffle_v4i64_v4i64__7_6_6_6() {
}
define void @s_shuffle_v4i64_v4i64__7_u_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 6, i32 6>
@@ -23599,12 +24133,9 @@ define void @s_shuffle_v4i64_v4i64__7_0_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23666,12 +24197,9 @@ define void @s_shuffle_v4i64_v4i64__7_1_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23733,12 +24261,9 @@ define void @s_shuffle_v4i64_v4i64__7_2_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23800,12 +24325,9 @@ define void @s_shuffle_v4i64_v4i64__7_3_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23862,14 +24384,10 @@ define void @s_shuffle_v4i64_v4i64__7_4_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23882,20 +24400,48 @@ define void @s_shuffle_v4i64_v4i64__7_4_6_6() {
}
define void @s_shuffle_v4i64_v4i64__7_5_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 6, i32 6>
@@ -23904,22 +24450,53 @@ define void @s_shuffle_v4i64_v4i64__7_5_6_6() {
}
define void @s_shuffle_v4i64_v4i64__7_7_6_6() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 6>
@@ -23968,12 +24545,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24039,14 +24613,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24112,14 +24682,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24181,12 +24747,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24252,14 +24815,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24312,14 +24871,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24376,14 +24931,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24396,20 +24947,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_6() {
}
define void @s_shuffle_v4i64_v4i64__u_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__u_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 poison, i32 7, i32 7, i32 7>
@@ -24467,12 +25046,9 @@ define void @s_shuffle_v4i64_v4i64__0_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24534,12 +25110,9 @@ define void @s_shuffle_v4i64_v4i64__1_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24601,12 +25174,9 @@ define void @s_shuffle_v4i64_v4i64__2_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24659,117 +25229,232 @@ define void @s_shuffle_v4i64_v4i64__3_7_7_7() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_7_7_7:
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__3_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__4_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__5_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__6_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x i64> asm "; def $0", "=s"()
+ %vec1 = call <4 x i64> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4i64_v4i64__7_u_7_7() {
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__4_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__4_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__5_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__5_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__6_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__6_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x i64> asm "; def $0", "=s"()
- %vec1 = call <4 x i64> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4i64_v4i64__7_u_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_u_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 poison, i32 7, i32 7>
@@ -24827,12 +25512,9 @@ define void @s_shuffle_v4i64_v4i64__7_0_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24894,12 +25576,9 @@ define void @s_shuffle_v4i64_v4i64__7_1_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24961,12 +25640,9 @@ define void @s_shuffle_v4i64_v4i64__7_2_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25028,12 +25704,9 @@ define void @s_shuffle_v4i64_v4i64__7_3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25090,14 +25763,10 @@ define void @s_shuffle_v4i64_v4i64__7_4_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25110,20 +25779,48 @@ define void @s_shuffle_v4i64_v4i64__7_4_7_7() {
}
define void @s_shuffle_v4i64_v4i64__7_5_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_5_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 5, i32 7, i32 7>
@@ -25132,22 +25829,53 @@ define void @s_shuffle_v4i64_v4i64__7_5_7_7() {
}
define void @s_shuffle_v4i64_v4i64__7_6_7_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_6_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 6, i32 7, i32 7>
@@ -25156,20 +25884,48 @@ define void @s_shuffle_v4i64_v4i64__7_6_7_7() {
}
define void @s_shuffle_v4i64_v4i64__7_7_u_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_u_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 7>
@@ -25227,12 +25983,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25294,12 +26047,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25361,12 +26111,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25428,12 +26175,9 @@ define void @s_shuffle_v4i64_v4i64__7_7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25486,14 +26230,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25550,14 +26290,10 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25570,20 +26306,48 @@ define void @s_shuffle_v4i64_v4i64__7_7_5_7() {
}
define void @s_shuffle_v4i64_v4i64__7_7_6_7() {
-; GFX9-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4i64_v4i64__7_7_6_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x i64> asm "; def $0", "=s"()
%vec1 = call <4 x i64> asm "; def $0", "=s"()
%shuf = shufflevector <4 x i64> %vec0, <4 x i64> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
index b30af835a7882..e34b6e95da51c 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v2p0.ll
@@ -45,6 +45,7 @@ define void @v_shuffle_v4p0_v2p0__0_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -84,10 +85,12 @@ define void @v_shuffle_v4p0_v2p0__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -140,10 +143,12 @@ define void @v_shuffle_v4p0_v2p0__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -203,7 +208,8 @@ define void @v_shuffle_v4p0_v2p0__3_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -254,14 +260,15 @@ define void @v_shuffle_v4p0_v2p0__3_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -308,7 +315,7 @@ define void @v_shuffle_v4p0_v2p0__3_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
@@ -351,10 +358,12 @@ define void @v_shuffle_v4p0_v2p0__3_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v2p0__3_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -2362,6 +2371,7 @@ define void @v_shuffle_v4p0_v2p0__0_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -2401,10 +2411,12 @@ define void @v_shuffle_v4p0_v2p0__1_2_2_2(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v2p0__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v4, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v2
; GFX942-NEXT: v_mov_b32_e32 v1, v3
; GFX942-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1]
@@ -3734,8 +3746,7 @@ define void @s_shuffle_v4p0_v2p0__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3793,8 +3804,7 @@ define void @s_shuffle_v4p0_v2p0__3_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3852,10 +3862,8 @@ define void @s_shuffle_v4p0_v2p0__3_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3909,8 +3917,7 @@ define void @s_shuffle_v4p0_v2p0__3_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3959,10 +3966,8 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -3975,18 +3980,43 @@ define void @s_shuffle_v4p0_v2p0__3_2_u_u() {
}
define void @s_shuffle_v4p0_v2p0__3_3_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 poison>
@@ -3995,21 +4025,52 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_u() {
}
define void @s_shuffle_v4p0_v2p0__3_3_0_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 poison>
@@ -4063,10 +4124,8 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4079,20 +4138,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_u() {
}
define void @s_shuffle_v4p0_v2p0__3_3_2_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 poison>
@@ -4101,20 +4188,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_u() {
}
define void @s_shuffle_v4p0_v2p0__3_3_3_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 poison>
@@ -4172,12 +4287,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4190,62 +4302,96 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_0() {
}
define void @s_shuffle_v4p0_v2p0__3_3_3_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_3_3_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_2:
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_3_3_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -4259,14 +4405,10 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -4279,22 +4421,53 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_2() {
}
define void @s_shuffle_v4p0_v2p0__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -4303,20 +4476,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_3_3() {
}
define void @s_shuffle_v4p0_v2p0__u_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4324,22 +4525,53 @@ define void @s_shuffle_v4p0_v2p0__u_0_0_0() {
}
define void @s_shuffle_v4p0_v2p0__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -4347,61 +4579,14 @@ define void @s_shuffle_v4p0_v2p0__0_0_0_0() {
}
define void @s_shuffle_v4p0_v2p0__1_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__2_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_0_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_0_0:
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -4411,54 +4596,526 @@ define void @s_shuffle_v4p0_v2p0__3_0_0_0() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__2_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_0_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_u_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_1_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_2_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 2, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_3_0_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_3_u_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 0>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_3_1_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_1_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_1_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_u_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_0_0:
+define void @s_shuffle_v4p0_v2p0__3_3_2_0() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_0:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -4467,16 +5124,18 @@ define void @s_shuffle_v4p0_v2p0__3_u_0_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_0:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4485,16 +5144,18 @@ define void @s_shuffle_v4p0_v2p0__3_u_0_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_0:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -4503,182 +5164,223 @@ define void @s_shuffle_v4p0_v2p0__3_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 0, i32 0>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 0>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_1_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_0_0:
+define void @s_shuffle_v4p0_v2p0__u_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 0, i32 0>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_2_0_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_0_0:
+define void @s_shuffle_v4p0_v2p0__0_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_0_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_0_0:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 2, i32 0, i32 0>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_3_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+define void @s_shuffle_v4p0_v2p0__2_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 0>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_3_u_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+define void @s_shuffle_v4p0_v2p0__3_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -4687,16 +5389,18 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4705,16 +5409,18 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_0:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -4723,90 +5429,81 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 0>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_3_1_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_1_0:
+define void @s_shuffle_v4p0_v2p0__3_u_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_1_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_0:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 0>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_3_2_0() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_0:
+define void @s_shuffle_v4p0_v2p0__3_0_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -4815,18 +5512,18 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_0() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_0:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -4835,18 +5532,18 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_0() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_0:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -4855,188 +5552,96 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 0>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_1_1:
+define void @s_shuffle_v4p0_v2p0__3_2_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:7]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s6
; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:7]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s6
; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_u_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_1_1:
+define void @s_shuffle_v4p0_v2p0__3_3_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -5044,17 +5649,17 @@ define void @s_shuffle_v4p0_v2p0__3_u_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -5062,228 +5667,129 @@ define void @s_shuffle_v4p0_v2p0__3_u_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 1, i32 1>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_0_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_1_1:
+define void @s_shuffle_v4p0_v2p0__3_3_u_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 0, i32 1, i32 1>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_2_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_1_1:
+define void @s_shuffle_v4p0_v2p0__3_3_0_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ; def s[8:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ; def s[8:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ; def s[8:11]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 2, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_3_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_3_u_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_3_0_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 1>
@@ -5341,12 +5847,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5445,8 +5948,7 @@ define void @s_shuffle_v4p0_v2p0__1_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5472,22 +5974,53 @@ define void @s_shuffle_v4p0_v2p0__2_2_2_2() {
}
define void @s_shuffle_v4p0_v2p0__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
@@ -5496,41 +6029,189 @@ define void @s_shuffle_v4p0_v2p0__3_2_2_2() {
}
define void @s_shuffle_v4p0_v2p0__3_u_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_0_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:7]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:7]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:3]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <2 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <2 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 0, i32 2, i32 2>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v2p0__3_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v2p0__3_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_0_2_2:
+define void @s_shuffle_v4p0_v2p0__3_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:7]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -5538,19 +6219,16 @@ define void @s_shuffle_v4p0_v2p0__3_0_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_0_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:7]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -5558,74 +6236,19 @@ define void @s_shuffle_v4p0_v2p0__3_0_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_0_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[12:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:3]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 0, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_1_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_1_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <2 x ptr> asm "; def $0", "=s"()
- %vec1 = call <2 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v2p0__3_3_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 2>
@@ -5674,12 +6297,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5741,12 +6361,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5812,14 +6429,10 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5832,20 +6445,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_2() {
}
define void @s_shuffle_v4p0_v2p0__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__u_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
@@ -5854,23 +6495,57 @@ define void @s_shuffle_v4p0_v2p0__u_3_3_3() {
}
define void @s_shuffle_v4p0_v2p0__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__0_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
@@ -5928,12 +6603,9 @@ define void @s_shuffle_v4p0_v2p0__1_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -5946,20 +6618,48 @@ define void @s_shuffle_v4p0_v2p0__1_3_3_3() {
}
define void @s_shuffle_v4p0_v2p0__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__2_3_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
@@ -5968,20 +6668,48 @@ define void @s_shuffle_v4p0_v2p0__2_3_3_3() {
}
define void @s_shuffle_v4p0_v2p0__3_u_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_u_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 poison, i32 3, i32 3>
@@ -6039,12 +6767,9 @@ define void @s_shuffle_v4p0_v2p0__3_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -6057,23 +6782,57 @@ define void @s_shuffle_v4p0_v2p0__3_0_3_3() {
}
define void @s_shuffle_v4p0_v2p0__3_1_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_1_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 1, i32 3, i32 3>
@@ -6082,22 +6841,53 @@ define void @s_shuffle_v4p0_v2p0__3_1_3_3() {
}
define void @s_shuffle_v4p0_v2p0__3_2_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_2_3_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
@@ -6106,20 +6896,48 @@ define void @s_shuffle_v4p0_v2p0__3_2_3_3() {
}
define void @s_shuffle_v4p0_v2p0__3_3_u_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_u_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 poison, i32 3>
@@ -6128,23 +6946,57 @@ define void @s_shuffle_v4p0_v2p0__3_3_u_3() {
}
define void @s_shuffle_v4p0_v2p0__3_3_0_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:11]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_0_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 0, i32 3>
@@ -6202,12 +7054,9 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:3]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -6220,20 +7069,48 @@ define void @s_shuffle_v4p0_v2p0__3_3_1_3() {
}
define void @s_shuffle_v4p0_v2p0__3_3_2_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[12:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v2p0__3_3_2_3:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <2 x ptr> asm "; def $0", "=s"()
%vec1 = call <2 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <2 x ptr> %vec0, <2 x ptr> %vec1, <4 x i32> <i32 3, i32 3, i32 2, i32 3>
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
index e6ac554735eee..93b0a34460a49 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll
@@ -87,6 +87,7 @@ define void @v_shuffle_v4p0_v3p0__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -126,10 +127,12 @@ define void @v_shuffle_v4p0_v3p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -182,6 +185,7 @@ define void @v_shuffle_v4p0_v3p0__4_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -222,10 +226,12 @@ define void @v_shuffle_v4p0_v3p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -285,11 +291,11 @@ define void @v_shuffle_v4p0_v3p0__5_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -336,14 +342,15 @@ define void @v_shuffle_v4p0_v3p0__5_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v10, 0
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v8
; GFX942-NEXT: v_mov_b32_e32 v1, v9
; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1]
@@ -392,14 +399,15 @@ define void @v_shuffle_v4p0_v3p0__5_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v10
; GFX942-NEXT: v_mov_b32_e32 v3, v11
; GFX942-NEXT: global_store_dwordx4 v12, v[2:5], s[0:1]
@@ -446,10 +454,12 @@ define void @v_shuffle_v4p0_v3p0__5_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_3_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: v_mov_b32_e32 v4, v0
@@ -494,10 +504,12 @@ define void @v_shuffle_v4p0_v3p0__5_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -540,10 +552,12 @@ define void @v_shuffle_v4p0_v3p0__5_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v3p0__5_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v4
; GFX942-NEXT: v_mov_b32_e32 v3, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
@@ -4699,6 +4713,7 @@ define void @v_shuffle_v4p0_v3p0__1_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v6, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -4738,10 +4753,12 @@ define void @v_shuffle_v4p0_v3p0__2_3_3_3(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v3p0__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v6, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v6, 0
+; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v4
; GFX942-NEXT: v_mov_b32_e32 v1, v5
; GFX942-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
@@ -7818,8 +7835,7 @@ define void @s_shuffle_v4p0_v3p0__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7859,8 +7875,7 @@ define void @s_shuffle_v4p0_v3p0__2_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7918,8 +7933,7 @@ define void @s_shuffle_v4p0_v3p0__4_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -7960,8 +7974,7 @@ define void @s_shuffle_v4p0_v3p0__5_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8016,11 +8029,11 @@ define void @s_shuffle_v4p0_v3p0__5_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8070,8 +8083,7 @@ define void @s_shuffle_v4p0_v3p0__5_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8125,10 +8137,8 @@ define void @s_shuffle_v4p0_v3p0__5_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8173,10 +8183,8 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8189,18 +8197,43 @@ define void @s_shuffle_v4p0_v3p0__5_3_u_u() {
}
define void @s_shuffle_v4p0_v3p0__5_4_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
@@ -8245,10 +8278,8 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8310,12 +8341,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8377,12 +8405,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8440,10 +8465,8 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8492,12 +8515,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8550,12 +8570,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8568,20 +8585,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_u() {
}
define void @s_shuffle_v4p0_v3p0__5_5_5_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
@@ -8639,12 +8684,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8706,12 +8748,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8773,12 +8812,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8835,14 +8871,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8895,14 +8927,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8915,22 +8943,53 @@ define void @s_shuffle_v4p0_v3p0__5_5_5_4() {
}
define void @s_shuffle_v4p0_v3p0__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
@@ -8979,12 +9038,9 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -8996,22 +9052,53 @@ define void @s_shuffle_v4p0_v3p0__u_0_0_0() {
}
define void @s_shuffle_v4p0_v3p0__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -9063,14 +9150,10 @@ define void @s_shuffle_v4p0_v3p0__1_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9122,14 +9205,10 @@ define void @s_shuffle_v4p0_v3p0__2_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9181,12 +9260,9 @@ define void @s_shuffle_v4p0_v3p0__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9248,17 +9324,13 @@ define void @s_shuffle_v4p0_v3p0__4_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9321,15 +9393,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9388,13 +9457,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9457,15 +9524,12 @@ define void @s_shuffle_v4p0_v3p0__5_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9527,14 +9591,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9597,15 +9657,12 @@ define void @s_shuffle_v4p0_v3p0__5_3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9667,12 +9724,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9738,14 +9792,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9807,12 +9857,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9878,14 +9925,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -9951,14 +9994,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10020,12 +10059,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10091,14 +10127,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10111,87 +10143,205 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_0() {
}
define void @s_shuffle_v4p0_v3p0__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v3p0__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <3 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__0_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v3p0__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <3 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
define void @s_shuffle_v4p0_v3p0__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -10199,20 +10349,48 @@ define void @s_shuffle_v4p0_v3p0__2_1_1_1() {
}
define void @s_shuffle_v4p0_v3p0__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_1_1_1:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -10269,12 +10447,9 @@ define void @s_shuffle_v4p0_v3p0__4_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10332,12 +10507,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10396,13 +10568,11 @@ define void @s_shuffle_v4p0_v3p0__5_u_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10465,15 +10635,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10531,14 +10698,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10601,15 +10764,12 @@ define void @s_shuffle_v4p0_v3p0__5_3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10671,12 +10831,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10742,14 +10899,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10811,12 +10964,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10882,14 +11032,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -10951,14 +11097,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11020,12 +11162,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11091,14 +11230,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11111,20 +11246,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_1() {
}
define void @s_shuffle_v4p0_v3p0__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11132,20 +11295,48 @@ define void @s_shuffle_v4p0_v3p0__u_2_2_2() {
}
define void @s_shuffle_v4p0_v3p0__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__0_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11153,22 +11344,53 @@ define void @s_shuffle_v4p0_v3p0__0_2_2_2() {
}
define void @s_shuffle_v4p0_v3p0__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__1_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11176,22 +11398,53 @@ define void @s_shuffle_v4p0_v3p0__1_2_2_2() {
}
define void @s_shuffle_v4p0_v3p0__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__2_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11199,20 +11452,48 @@ define void @s_shuffle_v4p0_v3p0__2_2_2_2() {
}
define void @s_shuffle_v4p0_v3p0__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_2_2_2:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -11269,12 +11550,9 @@ define void @s_shuffle_v4p0_v3p0__4_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11332,12 +11610,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11391,10 +11666,8 @@ define void @s_shuffle_v4p0_v3p0__5_u_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11456,14 +11729,10 @@ define void @s_shuffle_v4p0_v3p0__5_0_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11517,10 +11786,8 @@ define void @s_shuffle_v4p0_v3p0__5_1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11578,12 +11845,9 @@ define void @s_shuffle_v4p0_v3p0__5_3_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11645,12 +11909,9 @@ define void @s_shuffle_v4p0_v3p0__5_4_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11712,12 +11973,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11779,12 +12037,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11846,14 +12101,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11919,14 +12170,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -11992,12 +12239,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12063,14 +12307,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12169,8 +12409,7 @@ define void @s_shuffle_v4p0_v3p0__1_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12210,8 +12449,7 @@ define void @s_shuffle_v4p0_v3p0__2_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12281,14 +12519,10 @@ define void @s_shuffle_v4p0_v3p0__4_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12341,14 +12575,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12397,12 +12627,9 @@ define void @s_shuffle_v4p0_v3p0__5_u_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12465,15 +12692,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12531,12 +12755,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12598,14 +12819,10 @@ define void @s_shuffle_v4p0_v3p0__5_2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12658,14 +12875,10 @@ define void @s_shuffle_v4p0_v3p0__5_4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12722,14 +12935,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12782,12 +12991,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12853,14 +13059,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12926,14 +13128,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[16:21]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s20
-; GFX942-NEXT: s_mov_b32 s9, s21
-; GFX942-NEXT: s_mov_b32 s10, s20
-; GFX942-NEXT: s_mov_b32 s11, s21
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[20:21]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -12995,12 +13193,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13057,14 +13252,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13077,20 +13268,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_3() {
}
define void @s_shuffle_v4p0_v3p0__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
@@ -13144,12 +13363,9 @@ define void @s_shuffle_v4p0_v3p0__0_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13211,12 +13427,9 @@ define void @s_shuffle_v4p0_v3p0__1_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13278,12 +13491,9 @@ define void @s_shuffle_v4p0_v3p0__2_4_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13296,20 +13506,48 @@ define void @s_shuffle_v4p0_v3p0__2_4_4_4() {
}
define void @s_shuffle_v4p0_v3p0__3_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
@@ -13318,22 +13556,53 @@ define void @s_shuffle_v4p0_v3p0__3_4_4_4() {
}
define void @s_shuffle_v4p0_v3p0__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
@@ -13342,22 +13611,53 @@ define void @s_shuffle_v4p0_v3p0__4_4_4_4() {
}
define void @s_shuffle_v4p0_v3p0__5_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
@@ -13402,12 +13702,9 @@ define void @s_shuffle_v4p0_v3p0__5_u_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13466,15 +13763,12 @@ define void @s_shuffle_v4p0_v3p0__5_0_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13532,12 +13826,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13599,12 +13890,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13657,14 +13945,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13717,14 +14001,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13773,12 +14053,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13840,12 +14117,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13907,12 +14181,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13974,12 +14245,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14028,14 +14296,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14048,20 +14312,48 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_4() {
}
define void @s_shuffle_v4p0_v3p0__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__u_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
@@ -14119,12 +14411,9 @@ define void @s_shuffle_v4p0_v3p0__0_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14186,12 +14475,9 @@ define void @s_shuffle_v4p0_v3p0__1_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14253,12 +14539,9 @@ define void @s_shuffle_v4p0_v3p0__2_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14271,20 +14554,48 @@ define void @s_shuffle_v4p0_v3p0__2_5_5_5() {
}
define void @s_shuffle_v4p0_v3p0__3_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__3_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
@@ -14293,22 +14604,53 @@ define void @s_shuffle_v4p0_v3p0__3_5_5_5() {
}
define void @s_shuffle_v4p0_v3p0__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__4_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
@@ -14317,20 +14659,48 @@ define void @s_shuffle_v4p0_v3p0__4_5_5_5() {
}
define void @s_shuffle_v4p0_v3p0__5_u_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_u_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
@@ -14388,12 +14758,9 @@ define void @s_shuffle_v4p0_v3p0__5_0_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14455,12 +14822,9 @@ define void @s_shuffle_v4p0_v3p0__5_1_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14522,12 +14886,9 @@ define void @s_shuffle_v4p0_v3p0__5_2_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s12
-; GFX942-NEXT: s_mov_b32 s9, s13
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14584,14 +14945,10 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14604,20 +14961,48 @@ define void @s_shuffle_v4p0_v3p0__5_3_5_5() {
}
define void @s_shuffle_v4p0_v3p0__5_4_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:13]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:13]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:13]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v3p0__5_4_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:13]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <3 x ptr> asm "; def $0", "=s"()
%vec1 = call <3 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <3 x ptr> %vec0, <3 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
@@ -14666,12 +15051,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_u_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14737,14 +15119,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_0_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14810,14 +15188,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_1_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s16
-; GFX942-NEXT: s_mov_b32 s9, s17
-; GFX942-NEXT: s_mov_b32 s10, s16
-; GFX942-NEXT: s_mov_b32 s11, s17
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14879,12 +15253,9 @@ define void @s_shuffle_v4p0_v3p0__5_5_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14937,14 +15308,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_3_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15001,14 +15368,10 @@ define void @s_shuffle_v4p0_v3p0__5_5_4_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:5]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
index ce1c54129f706..8a73e474f4ff8 100644
--- a/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
+++ b/llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v4p0.ll
@@ -87,6 +87,7 @@ define void @v_shuffle_v4p0_v4p0__1_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -126,6 +127,7 @@ define void @v_shuffle_v4p0_v4p0__2_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -165,10 +167,12 @@ define void @v_shuffle_v4p0_v4p0__3_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -221,6 +225,7 @@ define void @v_shuffle_v4p0_v4p0__5_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -261,6 +266,7 @@ define void @v_shuffle_v4p0_v4p0__6_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -301,10 +307,12 @@ define void @v_shuffle_v4p0_v4p0__7_u_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_u_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -364,11 +372,11 @@ define void @v_shuffle_v4p0_v4p0__7_0_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[2:9]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v8
-; GFX942-NEXT: v_mov_b32_e32 v3, v9
+; GFX942-NEXT: global_store_dwordx4 v10, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b32_e32 v2, v8
+; GFX942-NEXT: v_mov_b32_e32 v3, v9
; GFX942-NEXT: global_store_dwordx4 v10, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -415,14 +423,15 @@ define void @v_shuffle_v4p0_v4p0__7_1_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_1_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v12, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v12, 0
+; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v10
; GFX942-NEXT: v_mov_b32_e32 v1, v11
; GFX942-NEXT: global_store_dwordx4 v12, v[0:3], s[0:1]
@@ -471,14 +480,15 @@ define void @v_shuffle_v4p0_v4p0__7_2_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_2_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v14, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v14, 0
+; GFX942-NEXT: global_store_dwordx4 v14, v[0:3], s[0:1] offset:16
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[6:13]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v12
; GFX942-NEXT: v_mov_b32_e32 v3, v13
; GFX942-NEXT: global_store_dwordx4 v14, v[2:5], s[0:1]
@@ -534,7 +544,7 @@ define void @v_shuffle_v4p0_v4p0__7_3_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v14
; GFX942-NEXT: v_mov_b32_e32 v5, v15
; GFX942-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1]
@@ -581,14 +591,15 @@ define void @v_shuffle_v4p0_v4p0__7_4_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_4_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v6
-; GFX942-NEXT: v_mov_b32_e32 v3, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: v_mov_b32_e32 v4, v0
; GFX942-NEXT: v_mov_b32_e32 v5, v1
+; GFX942-NEXT: v_mov_b32_e32 v2, v6
+; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -629,10 +640,12 @@ define void @v_shuffle_v4p0_v4p0__7_5_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_5_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -675,10 +688,12 @@ define void @v_shuffle_v4p0_v4p0__7_6_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v4p0__7_6_u_u:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v2, v6
; GFX942-NEXT: v_mov_b32_e32 v3, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
@@ -727,6 +742,7 @@ define void @v_shuffle_v4p0_v4p0__7_7_u_u(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: v_mov_b32_e32 v4, v6
; GFX942-NEXT: v_mov_b32_e32 v5, v7
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7703,6 +7719,7 @@ define void @v_shuffle_v4p0_v4p0__1_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[2:5], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7742,6 +7759,7 @@ define void @v_shuffle_v4p0_v4p0__2_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
; GFX942-NEXT: global_store_dwordx4 v8, v[4:7], s[0:1]
; GFX942-NEXT: s_waitcnt vmcnt(0)
; GFX942-NEXT: s_setpc_b64 s[30:31]
@@ -7781,10 +7799,12 @@ define void @v_shuffle_v4p0_v4p0__3_4_4_4(ptr addrspace(1) inreg %ptr) {
; GFX942-LABEL: v_shuffle_v4p0_v4p0__3_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: v_mov_b32_e32 v8, 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def v[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: v_mov_b32_e32 v8, 0
+; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16
+; GFX942-NEXT: s_nop 1
; GFX942-NEXT: v_mov_b32_e32 v0, v6
; GFX942-NEXT: v_mov_b32_e32 v1, v7
; GFX942-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1]
@@ -13289,8 +13309,7 @@ define void @s_shuffle_v4p0_v4p0__1_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13330,8 +13349,7 @@ define void @s_shuffle_v4p0_v4p0__2_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13375,8 +13393,7 @@ define void @s_shuffle_v4p0_v4p0__3_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13434,8 +13451,7 @@ define void @s_shuffle_v4p0_v4p0__5_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13476,8 +13492,7 @@ define void @s_shuffle_v4p0_v4p0__6_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13522,8 +13537,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13582,10 +13596,8 @@ define void @s_shuffle_v4p0_v4p0__7_0_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13639,8 +13651,7 @@ define void @s_shuffle_v4p0_v4p0__7_1_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13698,10 +13709,8 @@ define void @s_shuffle_v4p0_v4p0__7_2_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13755,10 +13764,8 @@ define void @s_shuffle_v4p0_v4p0__7_3_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13807,10 +13814,8 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13823,18 +13828,43 @@ define void @s_shuffle_v4p0_v4p0__7_4_u_u() {
}
define void @s_shuffle_v4p0_v4p0__7_5_u_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_u_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 poison, i32 poison>
@@ -13879,10 +13909,8 @@ define void @s_shuffle_v4p0_v4p0__7_6_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13927,10 +13955,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -13989,13 +14015,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14054,13 +14078,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14114,10 +14136,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14175,12 +14195,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14229,12 +14246,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14283,12 +14297,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14301,20 +14312,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_u() {
}
define void @s_shuffle_v4p0_v4p0__7_7_6_u() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_u:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
@@ -14363,12 +14402,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_u() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14434,14 +14470,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14507,14 +14539,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14580,14 +14608,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14649,12 +14673,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14711,14 +14732,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14771,14 +14788,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14835,14 +14848,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14855,22 +14864,53 @@ define void @s_shuffle_v4p0_v4p0__7_7_7_6() {
}
define void @s_shuffle_v4p0_v4p0__7_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
@@ -14919,12 +14959,9 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -14936,22 +14973,53 @@ define void @s_shuffle_v4p0_v4p0__u_0_0_0() {
}
define void @s_shuffle_v4p0_v4p0__0_0_0_0() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s8
-; GFX9-NEXT: s_mov_b32 s11, s9
-; GFX9-NEXT: s_mov_b32 s12, s8
-; GFX9-NEXT: s_mov_b32 s13, s9
-; GFX9-NEXT: s_mov_b32 s14, s8
-; GFX9-NEXT: s_mov_b32 s15, s9
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s8
+; GFX900-NEXT: s_mov_b32 s11, s9
+; GFX900-NEXT: s_mov_b32 s12, s8
+; GFX900-NEXT: s_mov_b32 s13, s9
+; GFX900-NEXT: s_mov_b32 s14, s8
+; GFX900-NEXT: s_mov_b32 s15, s9
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s8
+; GFX90A-NEXT: s_mov_b32 s11, s9
+; GFX90A-NEXT: s_mov_b32 s12, s8
+; GFX90A-NEXT: s_mov_b32 s13, s9
+; GFX90A-NEXT: s_mov_b32 s14, s8
+; GFX90A-NEXT: s_mov_b32 s15, s9
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_0_0_0:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[8:9]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[8:9]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> zeroinitializer
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
@@ -15003,14 +15071,10 @@ define void @s_shuffle_v4p0_v4p0__1_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15062,14 +15126,10 @@ define void @s_shuffle_v4p0_v4p0__2_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15125,14 +15185,10 @@ define void @s_shuffle_v4p0_v4p0__3_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15184,12 +15240,9 @@ define void @s_shuffle_v4p0_v4p0__4_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15251,17 +15304,13 @@ define void @s_shuffle_v4p0_v4p0__5_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15324,15 +15373,12 @@ define void @s_shuffle_v4p0_v4p0__6_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15395,17 +15441,13 @@ define void @s_shuffle_v4p0_v4p0__7_0_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15464,15 +15506,12 @@ define void @s_shuffle_v4p0_v4p0__7_u_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15535,17 +15574,13 @@ define void @s_shuffle_v4p0_v4p0__7_1_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15611,14 +15646,10 @@ define void @s_shuffle_v4p0_v4p0__7_2_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15680,14 +15711,10 @@ define void @s_shuffle_v4p0_v4p0__7_3_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15750,17 +15777,13 @@ define void @s_shuffle_v4p0_v4p0__7_4_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15822,12 +15845,9 @@ define void @s_shuffle_v4p0_v4p0__7_5_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15893,14 +15913,10 @@ define void @s_shuffle_v4p0_v4p0__7_6_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -15963,15 +15979,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16030,13 +16043,11 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16099,15 +16110,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16169,14 +16177,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16238,14 +16242,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16304,15 +16304,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16375,15 +16372,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s0
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16445,12 +16439,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_0() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -16463,149 +16454,12 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_0() {
}
define void @s_shuffle_v4p0_v4p0__u_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__0_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__3_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_1_1_1() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: s_mov_b32 s14, s10
@@ -16615,17 +16469,12 @@ define void @s_shuffle_v4p0_v4p0__5_1_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: s_mov_b32 s14, s10
@@ -16635,44 +16484,31 @@ define void @s_shuffle_v4p0_v4p0__5_1_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
+define void @s_shuffle_v4p0_v4p0__0_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: s_mov_b32 s14, s10
@@ -16682,17 +16518,12 @@ define void @s_shuffle_v4p0_v4p0__6_1_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: s_mov_b32 s14, s10
@@ -16702,44 +16533,33 @@ define void @s_shuffle_v4p0_v4p0__6_1_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
+define void @s_shuffle_v4p0_v4p0__1_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s10
; GFX900-NEXT: s_mov_b32 s13, s11
; GFX900-NEXT: s_mov_b32 s14, s10
@@ -16749,17 +16569,14 @@ define void @s_shuffle_v4p0_v4p0__7_1_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s10
; GFX90A-NEXT: s_mov_b32 s13, s11
; GFX90A-NEXT: s_mov_b32 s14, s10
@@ -16769,283 +16586,224 @@ define void @s_shuffle_v4p0_v4p0__7_1_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
+define void @s_shuffle_v4p0_v4p0__2_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
+define void @s_shuffle_v4p0_v4p0__3_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
+define void @s_shuffle_v4p0_v4p0__4_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
+define void @s_shuffle_v4p0_v4p0__5_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -17054,140 +16812,126 @@ define void @s_shuffle_v4p0_v4p0__7_3_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
+define void @s_shuffle_v4p0_v4p0__6_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
+define void @s_shuffle_v4p0_v4p0__7_1_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -17196,25 +16940,22 @@ define void @s_shuffle_v4p0_v4p0__7_5_1_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
+define void @s_shuffle_v4p0_v4p0__7_u_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -17225,8 +16966,6 @@ define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -17236,7 +16975,7 @@ define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -17247,8 +16986,6 @@ define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -17258,36 +16995,31 @@ define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
+define void @s_shuffle_v4p0_v4p0__7_0_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -17298,8 +17030,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s14
; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -17309,7 +17041,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -17320,8 +17052,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s14
; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -17331,113 +17063,109 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
+define void @s_shuffle_v4p0_v4p0__7_2_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
+define void @s_shuffle_v4p0_v4p0__7_3_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -17445,21 +17173,19 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -17467,135 +17193,141 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
+define void @s_shuffle_v4p0_v4p0__7_4_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
+define void @s_shuffle_v4p0_v4p0__7_5_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -17604,39 +17336,36 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_1() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
+define void @s_shuffle_v4p0_v4p0__7_6_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -17644,19 +17373,21 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -17664,48 +17395,46 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
+define void @s_shuffle_v4p0_v4p0__7_7_1_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -17713,21 +17442,21 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -17735,34 +17464,31 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s14, s2
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_1() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
+define void @s_shuffle_v4p0_v4p0__7_7_u_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -17782,7 +17508,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_1() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -17802,269 +17528,134 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_1() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__0_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__3_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_2_2_2() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_0_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_2_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18073,65 +17664,59 @@ define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_3_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18140,195 +17725,195 @@ define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_4_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_5_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+define void @s_shuffle_v4p0_v4p0__7_7_6_1() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s14, s12
-; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s14, s12
-; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_1:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18337,35 +17922,29 @@ define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+define void @s_shuffle_v4p0_v4p0__u_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18373,19 +17952,14 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18393,46 +17967,33 @@ define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+define void @s_shuffle_v4p0_v4p0__0_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18440,19 +18001,14 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18460,113 +18016,89 @@ define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+define void @s_shuffle_v4p0_v4p0__1_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+define void @s_shuffle_v4p0_v4p0__2_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18574,19 +18106,16 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18594,44 +18123,36 @@ define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+define void @s_shuffle_v4p0_v4p0__3_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18639,17 +18160,16 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18657,44 +18177,34 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
+define void @s_shuffle_v4p0_v4p0__4_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
@@ -18702,17 +18212,14 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
@@ -18720,70 +18227,66 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+define void @s_shuffle_v4p0_v4p0__5_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18792,67 +18295,58 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
+define void @s_shuffle_v4p0_v4p0__6_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18861,67 +18355,62 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+define void @s_shuffle_v4p0_v4p0__7_2_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -18930,94 +18419,81 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+define void @s_shuffle_v4p0_v4p0__7_u_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+define void @s_shuffle_v4p0_v4p0__7_0_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19028,8 +18504,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: s_mov_b32 s14, s16
; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
@@ -19037,7 +18515,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19048,8 +18526,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: s_mov_b32 s14, s16
; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
@@ -19057,76 +18537,68 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_2() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+define void @s_shuffle_v4p0_v4p0__7_1_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19135,157 +18607,21 @@ define void @s_shuffle_v4p0_v4p0__7_7_6_2() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__0_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__1_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__2_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__3_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_3_3_3() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_3_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19294,18 +18630,18 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19314,18 +18650,18 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19334,25 +18670,22 @@ define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_4_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -19361,16 +18694,18 @@ define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -19379,16 +18714,18 @@ define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19397,65 +18734,62 @@ define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+define void @s_shuffle_v4p0_v4p0__7_5_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19464,61 +18798,62 @@ define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+define void @s_shuffle_v4p0_v4p0__7_6_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[16:23]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[16:23]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19527,67 +18862,58 @@ define void @s_shuffle_v4p0_v4p0__7_u_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_2_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s18
-; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s18
-; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19596,63 +18922,58 @@ define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_u_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19661,63 +18982,58 @@ define void @s_shuffle_v4p0_v4p0__7_1_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_0_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19726,65 +19042,63 @@ define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_1_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19793,65 +19107,63 @@ define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_3_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s12, s18
; GFX900-NEXT: s_mov_b32 s13, s19
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s12, s18
; GFX90A-NEXT: s_mov_b32 s13, s19
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -19860,187 +19172,192 @@ define void @s_shuffle_v4p0_v4p0__7_5_3_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_4_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+define void @s_shuffle_v4p0_v4p0__7_7_5_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+define void @s_shuffle_v4p0_v4p0__7_7_6_2() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_2:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -20049,926 +19366,898 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
+define void @s_shuffle_v4p0_v4p0__u_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+define void @s_shuffle_v4p0_v4p0__0_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+define void @s_shuffle_v4p0_v4p0__1_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+define void @s_shuffle_v4p0_v4p0__2_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+define void @s_shuffle_v4p0_v4p0__3_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+define void @s_shuffle_v4p0_v4p0__4_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s14
-; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s14
; GFX900-NEXT: s_mov_b32 s11, s15
-; GFX900-NEXT: s_mov_b32 s14, s18
-; GFX900-NEXT: s_mov_b32 s15, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s14
-; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s14
; GFX90A-NEXT: s_mov_b32 s11, s15
-; GFX90A-NEXT: s_mov_b32 s14, s18
-; GFX90A-NEXT: s_mov_b32 s15, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
+define void @s_shuffle_v4p0_v4p0__5_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
+define void @s_shuffle_v4p0_v4p0__6_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_3_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_u_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_4_4_4() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_4_4_4:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__5_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_0_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__6_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_1_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+define void @s_shuffle_v4p0_v4p0__7_2_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+define void @s_shuffle_v4p0_v4p0__7_4_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+define void @s_shuffle_v4p0_v4p0__7_5_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s18
+; GFX900-NEXT: s_mov_b32 s13, s19
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s18
+; GFX90A-NEXT: s_mov_b32 s13, s19
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+define void @s_shuffle_v4p0_v4p0__7_6_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -20979,16 +20268,16 @@ define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s22
; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -20999,16 +20288,16 @@ define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s22
; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21017,25 +20306,22 @@ define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_3_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -21046,18 +20332,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -21068,649 +20350,609 @@ define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_u_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s16
-; GFX942-NEXT: s_mov_b32 s13, s17
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_5_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_0_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s6
-; GFX900-NEXT: s_mov_b32 s11, s7
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s6
-; GFX90A-NEXT: s_mov_b32 s11, s7
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_1_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s20
-; GFX900-NEXT: s_mov_b32 s11, s21
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s20
-; GFX90A-NEXT: s_mov_b32 s11, s21
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+define void @s_shuffle_v4p0_v4p0__7_7_2_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+define void @s_shuffle_v4p0_v4p0__7_7_4_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+define void @s_shuffle_v4p0_v4p0__7_7_5_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+define void @s_shuffle_v4p0_v4p0__7_7_6_3() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s18
+; GFX900-NEXT: s_mov_b32 s15, s19
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s18
+; GFX90A-NEXT: s_mov_b32 s15, s19
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_3:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+define void @s_shuffle_v4p0_v4p0__u_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__0_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+define void @s_shuffle_v4p0_v4p0__1_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s4
-; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[16:23]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s22
-; GFX942-NEXT: s_mov_b32 s9, s23
-; GFX942-NEXT: s_mov_b32 s10, s22
-; GFX942-NEXT: s_mov_b32 s11, s23
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+define void @s_shuffle_v4p0_v4p0__2_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__3_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s4
-; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -21718,8 +20960,74 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_4_4_4() {
+; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_4_4_4:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: ;;#ASMSTART
+; GFX9-NEXT: ; use s[8:15]
+; GFX9-NEXT: ;;#ASMEND
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s4
; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
@@ -21727,129 +21035,1656 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_4() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_4_4_4:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+define void @s_shuffle_v4p0_v4p0__6_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s22
-; GFX900-NEXT: s_mov_b32 s9, s23
-; GFX900-NEXT: s_mov_b32 s10, s22
-; GFX900-NEXT: s_mov_b32 s11, s23
-; GFX900-NEXT: s_mov_b32 s12, s20
-; GFX900-NEXT: s_mov_b32 s13, s21
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s22
-; GFX90A-NEXT: s_mov_b32 s9, s23
-; GFX90A-NEXT: s_mov_b32 s10, s22
-; GFX90A-NEXT: s_mov_b32 s11, s23
-; GFX90A-NEXT: s_mov_b32 s12, s20
-; GFX90A-NEXT: s_mov_b32 s13, s21
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_4_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_u_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_0_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_1_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_2_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_3_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[16:17]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_5_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s6
+; GFX900-NEXT: s_mov_b32 s11, s7
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s6
+; GFX90A-NEXT: s_mov_b32 s11, s7
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_6_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s20
+; GFX900-NEXT: s_mov_b32 s11, s21
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s20
+; GFX90A-NEXT: s_mov_b32 s11, s21
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_4_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_u_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_0_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_1_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_2_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_3_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[16:23]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[22:23]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_5_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s4
+; GFX900-NEXT: s_mov_b32 s15, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s4
+; GFX90A-NEXT: s_mov_b32 s15, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_7_6_4() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[16:23]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s22
+; GFX900-NEXT: s_mov_b32 s9, s23
+; GFX900-NEXT: s_mov_b32 s10, s22
+; GFX900-NEXT: s_mov_b32 s11, s23
+; GFX900-NEXT: s_mov_b32 s12, s20
+; GFX900-NEXT: s_mov_b32 s13, s21
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[16:23]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s22
+; GFX90A-NEXT: s_mov_b32 s9, s23
+; GFX90A-NEXT: s_mov_b32 s10, s22
+; GFX90A-NEXT: s_mov_b32 s11, s23
+; GFX90A-NEXT: s_mov_b32 s12, s20
+; GFX90A-NEXT: s_mov_b32 s13, s21
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_4:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[0:1]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__u_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__1_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__2_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__3_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__6_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_5_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s10
+; GFX900-NEXT: s_mov_b32 s13, s11
+; GFX900-NEXT: s_mov_b32 s14, s10
+; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s10
+; GFX90A-NEXT: s_mov_b32 s13, s11
+; GFX90A-NEXT: s_mov_b32 s14, s10
+; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s0
-; GFX942-NEXT: s_mov_b32 s15, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s14
-; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -21857,17 +22692,19 @@ define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s14
-; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -21875,74 +22712,69 @@ define void @s_shuffle_v4p0_v4p0__0_5_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_nop 0
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s2
-; GFX942-NEXT: s_mov_b32 s11, s3
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -21951,253 +22783,150 @@ define void @s_shuffle_v4p0_v4p0__1_5_5_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_5_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s10
-; GFX900-NEXT: s_mov_b32 s13, s11
-; GFX900-NEXT: s_mov_b32 s14, s10
-; GFX900-NEXT: s_mov_b32 s15, s11
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s10
-; GFX90A-NEXT: s_mov_b32 s13, s11
-; GFX90A-NEXT: s_mov_b32 s14, s10
-; GFX90A-NEXT: s_mov_b32 s15, s11
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_5_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ; def s[12:19]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s10
-; GFX942-NEXT: s_mov_b32 s13, s11
-; GFX942-NEXT: s_mov_b32 s14, s10
-; GFX942-NEXT: s_mov_b32 s15, s11
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__6_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_5_5_5() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_5_5:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s10
-; GFX9-NEXT: s_mov_b32 s13, s11
-; GFX9-NEXT: s_mov_b32 s14, s10
-; GFX9-NEXT: s_mov_b32 s15, s11
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+define void @s_shuffle_v4p0_v4p0__7_4_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22205,6 +22934,8 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s4
+; GFX900-NEXT: s_mov_b32 s11, s5
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
@@ -22214,7 +22945,7 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22222,6 +22953,8 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s4
+; GFX90A-NEXT: s_mov_b32 s11, s5
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
@@ -22231,43 +22964,38 @@ define void @s_shuffle_v4p0_v4p0__7_u_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+define void @s_shuffle_v4p0_v4p0__7_6_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
; GFX900-NEXT: s_mov_b32 s12, s14
; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: ;;#ASMSTART
@@ -22275,19 +23003,16 @@ define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
; GFX90A-NEXT: s_mov_b32 s12, s14
; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: ;;#ASMSTART
@@ -22295,114 +23020,92 @@ define void @s_shuffle_v4p0_v4p0__7_0_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_0_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_nop 0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_1_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s6
+; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s6
+; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_1_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[8:15]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s12
-; GFX900-NEXT: s_mov_b32 s11, s13
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22410,21 +23113,14 @@ define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s12
-; GFX90A-NEXT: s_mov_b32 s11, s13
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22432,34 +23128,28 @@ define void @s_shuffle_v4p0_v4p0__7_2_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_2_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_0_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22470,14 +23160,16 @@ define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22488,174 +23180,176 @@ define void @s_shuffle_v4p0_v4p0__7_3_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_3_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ; def s[4:11]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_4_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_1_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s10, s4
-; GFX900-NEXT: s_mov_b32 s11, s5
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s18
+; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s10, s18
+; GFX900-NEXT: s_mov_b32 s11, s19
; GFX900-NEXT: s_mov_b32 s12, s6
; GFX900-NEXT: s_mov_b32 s13, s7
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s10, s4
-; GFX90A-NEXT: s_mov_b32 s11, s5
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s18
+; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s10, s18
+; GFX90A-NEXT: s_mov_b32 s11, s19
; GFX90A-NEXT: s_mov_b32 s12, s6
; GFX90A-NEXT: s_mov_b32 s13, s7
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_4_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_6_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_2_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s14, s6
+; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s14, s6
+; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+define void @s_shuffle_v4p0_v4p0__7_7_3_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22663,16 +23357,19 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22680,33 +23377,31 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_5_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
+define void @s_shuffle_v4p0_v4p0__7_7_4_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
@@ -22714,6 +23409,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s10
; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s12, s4
+; GFX900-NEXT: s_mov_b32 s13, s5
; GFX900-NEXT: s_mov_b32 s14, s6
; GFX900-NEXT: s_mov_b32 s15, s7
; GFX900-NEXT: ;;#ASMSTART
@@ -22721,7 +23418,7 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
@@ -22729,6 +23426,8 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s10
; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s12, s4
+; GFX90A-NEXT: s_mov_b32 s13, s5
; GFX90A-NEXT: s_mov_b32 s14, s6
; GFX90A-NEXT: s_mov_b32 s15, s7
; GFX90A-NEXT: ;;#ASMSTART
@@ -22736,201 +23435,175 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_5() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_0_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
+define void @s_shuffle_v4p0_v4p0__7_7_6_5() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
; GFX900-NEXT: s_mov_b32 s10, s18
; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
; GFX90A-NEXT: s_mov_b32 s10, s18
; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_0_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_1_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
+define void @s_shuffle_v4p0_v4p0__u_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s6
-; GFX900-NEXT: s_mov_b32 s13, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s6
-; GFX90A-NEXT: s_mov_b32 s13, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_1_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[4:11]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s10
-; GFX942-NEXT: s_mov_b32 s9, s11
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_2_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
+define void @s_shuffle_v4p0_v4p0__0_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s10, s16
+; GFX900-NEXT: s_mov_b32 s11, s17
+; GFX900-NEXT: s_mov_b32 s12, s16
+; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s14, s16
+; GFX900-NEXT: s_mov_b32 s15, s17
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s10, s16
+; GFX90A-NEXT: s_mov_b32 s11, s17
+; GFX90A-NEXT: s_mov_b32 s12, s16
+; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s14, s16
+; GFX90A-NEXT: s_mov_b32 s15, s17
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_2_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
@@ -22939,311 +23612,271 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_5() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_3_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
+define void @s_shuffle_v4p0_v4p0__1_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ; def s[4:11]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s14
-; GFX900-NEXT: s_mov_b32 s13, s15
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: s_mov_b32 s8, s6
+; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ; def s[4:11]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s14
-; GFX90A-NEXT: s_mov_b32 s13, s15
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: s_mov_b32 s8, s6
+; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_3_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[12:19]
+; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_4_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
+define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
+; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s10
-; GFX900-NEXT: s_mov_b32 s9, s11
-; GFX900-NEXT: s_mov_b32 s12, s4
-; GFX900-NEXT: s_mov_b32 s13, s5
-; GFX900-NEXT: s_mov_b32 s14, s6
-; GFX900-NEXT: s_mov_b32 s15, s7
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s16
+; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
+; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s10
-; GFX90A-NEXT: s_mov_b32 s9, s11
-; GFX90A-NEXT: s_mov_b32 s12, s4
-; GFX90A-NEXT: s_mov_b32 s13, s5
-; GFX90A-NEXT: s_mov_b32 s14, s6
-; GFX90A-NEXT: s_mov_b32 s15, s7
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s16
+; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_4_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__7_7_6_5() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
+define void @s_shuffle_v4p0_v4p0__3_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[12:19]
; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_mov_b32 s8, s18
; GFX900-NEXT: s_mov_b32 s9, s19
-; GFX900-NEXT: s_mov_b32 s10, s18
-; GFX900-NEXT: s_mov_b32 s11, s19
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[12:19]
; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_mov_b32 s8, s18
; GFX90A-NEXT: s_mov_b32 s9, s19
-; GFX90A-NEXT: s_mov_b32 s10, s18
-; GFX90A-NEXT: s_mov_b32 s11, s19
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_5:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s2
-; GFX942-NEXT: s_mov_b32 s15, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__u_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__0_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
+define void @s_shuffle_v4p0_v4p0__4_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s10, s16
-; GFX900-NEXT: s_mov_b32 s11, s17
-; GFX900-NEXT: s_mov_b32 s12, s16
-; GFX900-NEXT: s_mov_b32 s13, s17
-; GFX900-NEXT: s_mov_b32 s14, s16
-; GFX900-NEXT: s_mov_b32 s15, s17
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; use s[8:15]
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s10, s16
-; GFX90A-NEXT: s_mov_b32 s11, s17
-; GFX90A-NEXT: s_mov_b32 s12, s16
-; GFX90A-NEXT: s_mov_b32 s13, s17
-; GFX90A-NEXT: s_mov_b32 s14, s16
-; GFX90A-NEXT: s_mov_b32 s15, s17
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use s[8:15]
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__0_6_6_6:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__1_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
+define void @s_shuffle_v4p0_v4p0__5_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[4:11]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s6
-; GFX900-NEXT: s_mov_b32 s9, s7
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23253,17 +23886,14 @@ define void @s_shuffle_v4p0_v4p0__1_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[4:11]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s6
-; GFX90A-NEXT: s_mov_b32 s9, s7
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23273,44 +23903,35 @@ define void @s_shuffle_v4p0_v4p0__1_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__1_6_6_6:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
+define void @s_shuffle_v4p0_v4p0__6_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s16
-; GFX900-NEXT: s_mov_b32 s9, s17
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23320,17 +23941,14 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s16
-; GFX90A-NEXT: s_mov_b32 s9, s17
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23340,44 +23958,35 @@ define void @s_shuffle_v4p0_v4p0__2_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__2_6_6_6:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
ret void
}
-define void @s_shuffle_v4p0_v4p0__3_6_6_6() {
-; GFX900-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
+define void @s_shuffle_v4p0_v4p0__7_6_6_6() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6:
; GFX900: ; %bb.0:
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: ;;#ASMSTART
-; GFX900-NEXT: ; def s[12:19]
-; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: ;;#ASMSTART
; GFX900-NEXT: ; def s[8:15]
; GFX900-NEXT: ;;#ASMEND
-; GFX900-NEXT: s_mov_b32 s8, s18
-; GFX900-NEXT: s_mov_b32 s9, s19
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
; GFX900-NEXT: s_mov_b32 s10, s12
; GFX900-NEXT: s_mov_b32 s11, s13
; GFX900-NEXT: s_mov_b32 s14, s12
@@ -23387,17 +23996,14 @@ define void @s_shuffle_v4p0_v4p0__3_6_6_6() {
; GFX900-NEXT: ;;#ASMEND
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
-; GFX90A-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def s[12:19]
-; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def s[8:15]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: s_mov_b32 s8, s18
-; GFX90A-NEXT: s_mov_b32 s9, s19
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
; GFX90A-NEXT: s_mov_b32 s10, s12
; GFX90A-NEXT: s_mov_b32 s11, s13
; GFX90A-NEXT: s_mov_b32 s14, s12
@@ -23407,119 +24013,19 @@ define void @s_shuffle_v4p0_v4p0__3_6_6_6() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_6_6_6:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s12
-; GFX942-NEXT: s_mov_b32 s11, s13
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__6_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_6_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_6_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 6, i32 6>
@@ -23528,20 +24034,48 @@ define void @s_shuffle_v4p0_v4p0__7_6_6_6() {
}
define void @s_shuffle_v4p0_v4p0__7_u_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 6, i32 6>
@@ -23599,12 +24133,9 @@ define void @s_shuffle_v4p0_v4p0__7_0_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23666,12 +24197,9 @@ define void @s_shuffle_v4p0_v4p0__7_1_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23733,12 +24261,9 @@ define void @s_shuffle_v4p0_v4p0__7_2_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23800,12 +24325,9 @@ define void @s_shuffle_v4p0_v4p0__7_3_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s12
-; GFX942-NEXT: s_mov_b32 s15, s13
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23862,14 +24384,10 @@ define void @s_shuffle_v4p0_v4p0__7_4_6_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s4
-; GFX942-NEXT: s_mov_b32 s13, s5
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -23882,20 +24400,48 @@ define void @s_shuffle_v4p0_v4p0__7_4_6_6() {
}
define void @s_shuffle_v4p0_v4p0__7_5_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 6, i32 6>
@@ -23904,22 +24450,53 @@ define void @s_shuffle_v4p0_v4p0__7_5_6_6() {
}
define void @s_shuffle_v4p0_v4p0__7_7_6_6() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s14, s12
-; GFX9-NEXT: s_mov_b32 s15, s13
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s14, s12
+; GFX900-NEXT: s_mov_b32 s15, s13
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s14, s12
+; GFX90A-NEXT: s_mov_b32 s15, s13
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_6:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[12:13]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 6>
@@ -23968,12 +24545,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_u_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24039,14 +24613,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24112,14 +24682,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24181,12 +24747,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24252,14 +24815,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s18
-; GFX942-NEXT: s_mov_b32 s9, s19
-; GFX942-NEXT: s_mov_b32 s10, s18
-; GFX942-NEXT: s_mov_b32 s11, s19
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s16
-; GFX942-NEXT: s_mov_b32 s15, s17
+; GFX942-NEXT: s_mov_b64 s[8:9], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[18:19]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[16:17]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24312,14 +24871,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24376,14 +24931,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_6() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s4
-; GFX942-NEXT: s_mov_b32 s15, s5
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[4:5]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24396,20 +24947,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_6() {
}
define void @s_shuffle_v4p0_v4p0__u_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__u_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 poison, i32 7, i32 7, i32 7>
@@ -24467,12 +25046,9 @@ define void @s_shuffle_v4p0_v4p0__0_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24534,12 +25110,9 @@ define void @s_shuffle_v4p0_v4p0__1_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s2
-; GFX942-NEXT: s_mov_b32 s9, s3
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24601,12 +25174,9 @@ define void @s_shuffle_v4p0_v4p0__2_7_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s4
-; GFX942-NEXT: s_mov_b32 s9, s5
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24659,117 +25229,232 @@ define void @s_shuffle_v4p0_v4p0__3_7_7_7() {
; GFX90A-NEXT: ;;#ASMEND
; GFX90A-NEXT: s_setpc_b64 s[30:31]
;
-; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_7_7_7:
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__3_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[0:7]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__4_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__5_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s10
+; GFX900-NEXT: s_mov_b32 s9, s11
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s10
+; GFX90A-NEXT: s_mov_b32 s9, s11
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[10:11]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__6_7_7_7() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s12
+; GFX900-NEXT: s_mov_b32 s9, s13
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s12
+; GFX90A-NEXT: s_mov_b32 s9, s13
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
+ %vec0 = call <4 x ptr> asm "; def $0", "=s"()
+ %vec1 = call <4 x ptr> asm "; def $0", "=s"()
+ %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
+ call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
+ ret void
+}
+
+define void @s_shuffle_v4p0_v4p0__7_u_7_7() {
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7:
; GFX942: ; %bb.0:
; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[8:15]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: ;;#ASMSTART
-; GFX942-NEXT: ; def s[0:7]
-; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
; GFX942-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__4_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__4_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__5_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__5_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s10
-; GFX9-NEXT: s_mov_b32 s9, s11
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__6_7_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__6_7_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s12
-; GFX9-NEXT: s_mov_b32 s9, s13
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
- %vec0 = call <4 x ptr> asm "; def $0", "=s"()
- %vec1 = call <4 x ptr> asm "; def $0", "=s"()
- %shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
- call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x ptr> %shuf)
- ret void
-}
-
-define void @s_shuffle_v4p0_v4p0__7_u_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_u_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 poison, i32 7, i32 7>
@@ -24827,12 +25512,9 @@ define void @s_shuffle_v4p0_v4p0__7_0_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24894,12 +25576,9 @@ define void @s_shuffle_v4p0_v4p0__7_1_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -24961,12 +25640,9 @@ define void @s_shuffle_v4p0_v4p0__7_2_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s4
-; GFX942-NEXT: s_mov_b32 s11, s5
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[4:5]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25028,12 +25704,9 @@ define void @s_shuffle_v4p0_v4p0__7_3_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s14
-; GFX942-NEXT: s_mov_b32 s13, s15
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25090,14 +25763,10 @@ define void @s_shuffle_v4p0_v4p0__7_4_7_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s0
-; GFX942-NEXT: s_mov_b32 s11, s1
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25110,20 +25779,48 @@ define void @s_shuffle_v4p0_v4p0__7_4_7_7() {
}
define void @s_shuffle_v4p0_v4p0__7_5_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_5_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 5, i32 7, i32 7>
@@ -25132,22 +25829,53 @@ define void @s_shuffle_v4p0_v4p0__7_5_7_7() {
}
define void @s_shuffle_v4p0_v4p0__7_6_7_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s12
-; GFX9-NEXT: s_mov_b32 s11, s13
-; GFX9-NEXT: s_mov_b32 s12, s14
-; GFX9-NEXT: s_mov_b32 s13, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s12
+; GFX900-NEXT: s_mov_b32 s11, s13
+; GFX900-NEXT: s_mov_b32 s12, s14
+; GFX900-NEXT: s_mov_b32 s13, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s12
+; GFX90A-NEXT: s_mov_b32 s11, s13
+; GFX90A-NEXT: s_mov_b32 s12, s14
+; GFX90A-NEXT: s_mov_b32 s13, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_6_7_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[12:13]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 6, i32 7, i32 7>
@@ -25156,20 +25884,48 @@ define void @s_shuffle_v4p0_v4p0__7_6_7_7() {
}
define void @s_shuffle_v4p0_v4p0__7_7_u_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_u_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 7>
@@ -25227,12 +25983,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_0_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25294,12 +26047,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_1_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25361,12 +26111,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_2_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25428,12 +26175,9 @@ define void @s_shuffle_v4p0_v4p0__7_7_3_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s14
-; GFX942-NEXT: s_mov_b32 s9, s15
-; GFX942-NEXT: s_mov_b32 s10, s14
-; GFX942-NEXT: s_mov_b32 s11, s15
-; GFX942-NEXT: s_mov_b32 s12, s6
-; GFX942-NEXT: s_mov_b32 s13, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25486,14 +26230,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_4_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s0
-; GFX942-NEXT: s_mov_b32 s13, s1
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[0:1]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25550,14 +26290,10 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_7() {
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; def s[0:7]
; GFX942-NEXT: ;;#ASMEND
-; GFX942-NEXT: s_mov_b32 s8, s6
-; GFX942-NEXT: s_mov_b32 s9, s7
-; GFX942-NEXT: s_mov_b32 s10, s6
-; GFX942-NEXT: s_mov_b32 s11, s7
-; GFX942-NEXT: s_mov_b32 s12, s2
-; GFX942-NEXT: s_mov_b32 s13, s3
-; GFX942-NEXT: s_mov_b32 s14, s6
-; GFX942-NEXT: s_mov_b32 s15, s7
+; GFX942-NEXT: s_mov_b64 s[8:9], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[6:7]
+; GFX942-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GFX942-NEXT: s_mov_b64 s[14:15], s[6:7]
; GFX942-NEXT: ;;#ASMSTART
; GFX942-NEXT: ; use s[8:15]
; GFX942-NEXT: ;;#ASMEND
@@ -25570,20 +26306,48 @@ define void @s_shuffle_v4p0_v4p0__7_7_5_7() {
}
define void @s_shuffle_v4p0_v4p0__7_7_6_7() {
-; GFX9-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; def s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_mov_b32 s8, s14
-; GFX9-NEXT: s_mov_b32 s9, s15
-; GFX9-NEXT: s_mov_b32 s10, s14
-; GFX9-NEXT: s_mov_b32 s11, s15
-; GFX9-NEXT: ;;#ASMSTART
-; GFX9-NEXT: ; use s[8:15]
-; GFX9-NEXT: ;;#ASMEND
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; def s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_mov_b32 s8, s14
+; GFX900-NEXT: s_mov_b32 s9, s15
+; GFX900-NEXT: s_mov_b32 s10, s14
+; GFX900-NEXT: s_mov_b32 s11, s15
+; GFX900-NEXT: ;;#ASMSTART
+; GFX900-NEXT: ; use s[8:15]
+; GFX900-NEXT: ;;#ASMEND
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX90A-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7:
+; GFX90A: ; %bb.0:
+; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; def s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_mov_b32 s8, s14
+; GFX90A-NEXT: s_mov_b32 s9, s15
+; GFX90A-NEXT: s_mov_b32 s10, s14
+; GFX90A-NEXT: s_mov_b32 s11, s15
+; GFX90A-NEXT: ;;#ASMSTART
+; GFX90A-NEXT: ; use s[8:15]
+; GFX90A-NEXT: ;;#ASMEND
+; GFX90A-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX942-LABEL: s_shuffle_v4p0_v4p0__7_7_6_7:
+; GFX942: ; %bb.0:
+; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; def s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_mov_b64 s[8:9], s[14:15]
+; GFX942-NEXT: s_mov_b64 s[10:11], s[14:15]
+; GFX942-NEXT: ;;#ASMSTART
+; GFX942-NEXT: ; use s[8:15]
+; GFX942-NEXT: ;;#ASMEND
+; GFX942-NEXT: s_setpc_b64 s[30:31]
%vec0 = call <4 x ptr> asm "; def $0", "=s"()
%vec1 = call <4 x ptr> asm "; def $0", "=s"()
%shuf = shufflevector <4 x ptr> %vec0, <4 x ptr> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 7>
diff --git a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
index d8c015b85584a..f3a9b665e2d99 100644
--- a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
@@ -6,25 +6,22 @@ define protected amdgpu_kernel void @test(ptr addrspace(1) %in, ptr addrspace(1)
; GFX942-LABEL: test:
; GFX942: ; %bb.0: ; %entry
; GFX942-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
-; GFX942-NEXT: v_mov_b32_e32 v2, v0
-; GFX942-NEXT: v_mov_b32_e32 v3, v0
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
+; GFX942-NEXT: v_mov_b64_e32 v[4:5], 0
+; GFX942-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
+; GFX942-NEXT: v_mov_b32_e32 v10, 0
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
; GFX942-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x0
-; GFX942-NEXT: v_mov_b64_e32 v[10:11], v[2:3]
-; GFX942-NEXT: v_mov_b64_e32 v[8:9], v[0:1]
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-NEXT: v_mov_b32_e32 v12, s4
-; GFX942-NEXT: v_mov_b32_e32 v13, s5
-; GFX942-NEXT: v_mov_b32_e32 v4, s6
-; GFX942-NEXT: v_mov_b32_e32 v5, s7
-; GFX942-NEXT: v_mov_b32_e32 v6, s7
-; GFX942-NEXT: v_mov_b32_e32 v7, s7
+; GFX942-NEXT: v_mov_b32_e32 v8, s4
+; GFX942-NEXT: v_mov_b32_e32 v9, s5
+; GFX942-NEXT: v_mov_b32_e32 v0, s6
+; GFX942-NEXT: v_mov_b32_e32 v1, s7
+; GFX942-NEXT: v_mov_b32_e32 v2, s7
+; GFX942-NEXT: v_mov_b32_e32 v3, s7
; GFX942-NEXT: s_nop 1
-; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[8:11], v[12:13], v[4:7], v13
+; GFX942-NEXT: v_smfmac_i32_16x16x64_i8 v[4:7], v[8:9], v[0:3], v9
; GFX942-NEXT: s_nop 6
-; GFX942-NEXT: global_store_dword v0, v11, s[2:3] offset:12
+; GFX942-NEXT: global_store_dword v10, v7, s[2:3] offset:12
; GFX942-NEXT: s_endpgm
entry:
%arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 0
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll
index d500a3e50f9f7..995ebc822f769 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmax.ll
@@ -229,9 +229,10 @@ define half @test_vector_reduce_fmax_v3half(<3 x half> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_fmax_v3half:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
-; GFX9-SDAG-NEXT: s_movk_i32 s0, 0x7e00
-; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, s0
+; GFX9-SDAG-NEXT: s_movk_i32 s0, 0xfe00
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x5040100
+; GFX9-SDAG-NEXT: v_perm_b32 v1, s0, v1, v2
+; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-SDAG-NEXT: s_nop 0
; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll
index 56e7e045e40eb..fe06485749651 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-reduce-fmin.ll
@@ -229,9 +229,10 @@ define half @test_vector_reduce_fmin_v3half(<3 x half> %v) {
; GFX9-SDAG-LABEL: test_vector_reduce_fmin_v3half:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_max_f16_e32 v1, v1, v1
; GFX9-SDAG-NEXT: s_movk_i32 s0, 0x7e00
-; GFX9-SDAG-NEXT: v_pack_b32_f16 v1, v1, s0
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, 0x5040100
+; GFX9-SDAG-NEXT: v_perm_b32 v1, s0, v1, v2
+; GFX9-SDAG-NEXT: v_pk_max_f16 v1, v1, v1
; GFX9-SDAG-NEXT: v_pk_max_f16 v0, v0, v0
; GFX9-SDAG-NEXT: s_nop 0
; GFX9-SDAG-NEXT: v_pk_min_f16 v0, v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index a401f989a2507..9867cd9495005 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -459,11 +459,10 @@ define amdgpu_kernel void @v8i8_phi_zeroinit(ptr addrspace(1) %src1, ptr addrspa
; GFX942-NEXT: ; %bb.1: ; %bb.1
; GFX942-NEXT: global_load_dwordx2 v[2:3], v5, s[10:11]
; GFX942-NEXT: v_cmp_gt_u32_e32 vcc, 7, v4
-; GFX942-NEXT: s_waitcnt vmcnt(1)
-; GFX942-NEXT: v_mov_b32_e32 v0, 0
; GFX942-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX942-NEXT: s_and_b64 s[4:5], vcc, exec
-; GFX942-NEXT: v_mov_b32_e32 v1, v0
+; GFX942-NEXT: s_waitcnt vmcnt(1)
+; GFX942-NEXT: v_mov_b64_e32 v[0:1], 0
; GFX942-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX942-NEXT: .LBB9_2: ; %Flow
; GFX942-NEXT: s_or_b64 exec, exec, s[2:3]
More information about the llvm-commits
mailing list