[llvm] 7818e5a - [LoongArch] lower vector shuffle to shift if possible (#132866)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 00:26:04 PDT 2025
Author: tangaac
Date: 2025-04-10T15:26:00+08:00
New Revision: 7818e5ab6725c1590ff5c4a483a76f08b8697cb7
URL: https://github.com/llvm/llvm-project/commit/7818e5ab6725c1590ff5c4a483a76f08b8697cb7
DIFF: https://github.com/llvm/llvm-project/commit/7818e5ab6725c1590ff5c4a483a76f08b8697cb7.diff
LOG: [LoongArch] lower vector shuffle to shift if possible (#132866)
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll
llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll
llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll
llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 900775eedfa7b..002d88cbeeba3 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -525,6 +525,121 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
}
}
+/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
+/// instruction.
+// The funciton matches elements from one of the input vector shuffled to the
+// left or right with zeroable elements 'shifted in'. It handles both the
+// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
+// lane.
+// Mostly copied from X86.
+static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
+ unsigned ScalarSizeInBits, ArrayRef<int> Mask,
+ int MaskOffset, const APInt &Zeroable) {
+ int Size = Mask.size();
+ unsigned SizeInBits = Size * ScalarSizeInBits;
+
+ auto CheckZeros = [&](int Shift, int Scale, bool Left) {
+ for (int i = 0; i < Size; i += Scale)
+ for (int j = 0; j < Shift; ++j)
+ if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
+ return false;
+
+ return true;
+ };
+
+ auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
+ int Step = 1) {
+ for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
+ if (!(Mask[i] == -1 || Mask[i] == Low))
+ return false;
+ return true;
+ };
+
+ auto MatchShift = [&](int Shift, int Scale, bool Left) {
+ for (int i = 0; i != Size; i += Scale) {
+ unsigned Pos = Left ? i + Shift : i;
+ unsigned Low = Left ? i : i + Shift;
+ unsigned Len = Scale - Shift;
+ if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
+ return -1;
+ }
+
+ int ShiftEltBits = ScalarSizeInBits * Scale;
+ bool ByteShift = ShiftEltBits > 64;
+ Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
+ : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
+ int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
+
+ // Normalize the scale for byte shifts to still produce an i64 element
+ // type.
+ Scale = ByteShift ? Scale / 2 : Scale;
+
+ // We need to round trip through the appropriate type for the shift.
+ MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
+ ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
+ : MVT::getVectorVT(ShiftSVT, Size / Scale);
+ return (int)ShiftAmt;
+ };
+
+ unsigned MaxWidth = 128;
+ for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
+ for (int Shift = 1; Shift != Scale; ++Shift)
+ for (bool Left : {true, false})
+ if (CheckZeros(Shift, Scale, Left)) {
+ int ShiftAmt = MatchShift(Shift, Scale, Left);
+ if (0 < ShiftAmt)
+ return ShiftAmt;
+ }
+
+ // no match
+ return -1;
+}
+
+/// Lower VECTOR_SHUFFLE as shift (if possible).
+///
+/// For example:
+/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
+/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
+/// is lowered to:
+/// (VBSLL_V $v0, $v0, 4)
+///
+/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
+/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
+/// is lowered to:
+/// (VSLLI_D $v0, $v0, 32)
+static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
+ MVT VT, SDValue V1, SDValue V2,
+ SelectionDAG &DAG,
+ const APInt &Zeroable) {
+ int Size = Mask.size();
+ assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ MVT ShiftVT;
+ SDValue V = V1;
+ unsigned Opcode;
+
+ // Try to match shuffle against V1 shift.
+ int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+ Mask, 0, Zeroable);
+
+ // If V1 failed, try to match shuffle against V2 shift.
+ if (ShiftAmt < 0) {
+ ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+ Mask, Size, Zeroable);
+ V = V2;
+ }
+
+ if (ShiftAmt < 0)
+ return SDValue();
+
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
+ "Illegal integer vector type");
+ V = DAG.getBitcast(ShiftVT, V);
+ V = DAG.getNode(Opcode, DL, ShiftVT, V,
+ DAG.getConstant(ShiftAmt, DL, MVT::i64));
+ return DAG.getBitcast(VT, V);
+}
+
/// Determine whether a range fits a regular pattern of values.
/// This function accounts for the possibility of jumping over the End iterator.
template <typename ValType>
@@ -593,14 +708,12 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
ArrayRef<int> Mask, MVT VT,
SDValue V1, SDValue V2,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const APInt &Zeroable) {
int Bits = VT.getSizeInBits();
int EltBits = VT.getScalarSizeInBits();
int NumElements = VT.getVectorNumElements();
- APInt KnownUndef, KnownZero;
- computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
- APInt Zeroable = KnownUndef | KnownZero;
if (Zeroable.isAllOnes())
return DAG.getConstant(0, DL, VT);
@@ -1062,6 +1175,10 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
"Unexpected mask size for shuffle!");
assert(Mask.size() % 2 == 0 && "Expected even mask size.");
+ APInt KnownUndef, KnownZero;
+ computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
+ APInt Zeroable = KnownUndef | KnownZero;
+
SDValue Result;
// TODO: Add more comparison patterns.
if (V2.isUndef()) {
@@ -1089,12 +1206,14 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
return Result;
+ if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
+ Zeroable)))
+ return Result;
if ((Result =
- lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG)))
+ lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
return Result;
if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
return Result;
-
return SDValue();
}
@@ -1495,6 +1614,10 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
SmallVector<int> NewMask(Mask);
canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
+ APInt KnownUndef, KnownZero;
+ computeZeroableShuffleElements(NewMask, V1, V2, KnownUndef, KnownZero);
+ APInt Zeroable = KnownUndef | KnownZero;
+
SDValue Result;
// TODO: Add more comparison patterns.
if (V2.isUndef()) {
@@ -1522,6 +1645,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
return Result;
@@ -5041,6 +5167,10 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VANY_NONZERO)
NODE_NAME_CASE(FRECIPE)
NODE_NAME_CASE(FRSQRTE)
+ NODE_NAME_CASE(VSLLI)
+ NODE_NAME_CASE(VSRLI)
+ NODE_NAME_CASE(VBSLL)
+ NODE_NAME_CASE(VBSRL)
}
#undef NODE_NAME_CASE
return nullptr;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 002fad0e20759..52d88b9b24a6b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -147,7 +147,15 @@ enum NodeType : unsigned {
// Floating point approximate reciprocal operation
FRECIPE,
- FRSQRTE
+ FRSQRTE,
+
+ // Vector logicial left / right shift by immediate
+ VSLLI,
+ VSRLI,
+
+ // Vector byte logicial left / right shift
+ VBSLL,
+ VBSRL
// Intrinsic operations end =============================================
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 7022fddf34100..d6d532cddb594 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1187,7 +1187,7 @@ multiclass PatShiftXrXr<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_D") LASX256:$xj, LASX256:$xk)>;
}
-multiclass PatShiftXrUimm<SDPatternOperator OpNode, string Inst> {
+multiclass PatShiftXrSplatUimm<SDPatternOperator OpNode, string Inst> {
def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm3 uimm3:$imm))),
(!cast<LAInst>(Inst#"_B") LASX256:$xj, uimm3:$imm)>;
def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm4 uimm4:$imm))),
@@ -1198,6 +1198,17 @@ multiclass PatShiftXrUimm<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_D") LASX256:$xj, uimm6:$imm)>;
}
+multiclass PatShiftXrUimm<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode(v32i8 LASX256:$vj), uimm3:$imm),
+ (!cast<LAInst>(Inst#"_B") LASX256:$vj, uimm3:$imm)>;
+ def : Pat<(OpNode(v16i16 LASX256:$vj), uimm4:$imm),
+ (!cast<LAInst>(Inst#"_H") LASX256:$vj, uimm4:$imm)>;
+ def : Pat<(OpNode(v8i32 LASX256:$vj), uimm5:$imm),
+ (!cast<LAInst>(Inst#"_W") LASX256:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode(v4i64 LASX256:$vj), uimm6:$imm),
+ (!cast<LAInst>(Inst#"_D") LASX256:$vj, uimm6:$imm)>;
+}
+
multiclass PatCCXrSimm5<CondCode CC, string Inst> {
def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj),
(v32i8 (SplatPat_simm5 simm5:$imm)), CC)),
@@ -1335,20 +1346,32 @@ def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
(XVXORI_B LASX256:$xj, uimm8:$imm)>;
+// XVBSLL_V
+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32,
+ v4f64] in def : Pat<(loongarch_vbsll(vt LASX256:$xj), uimm5:$imm),
+ (XVBSLL_V LASX256:$xj, uimm5:$imm)>;
+
+// XVBSRL_V
+foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32,
+ v4f64] in def : Pat<(loongarch_vbsrl(vt LASX256:$xj), uimm5:$imm),
+ (XVBSRL_V LASX256:$xj, uimm5:$imm)>;
+
// XVSLL[I]_{B/H/W/D}
defm : PatXrXr<shl, "XVSLL">;
defm : PatShiftXrXr<shl, "XVSLL">;
-defm : PatShiftXrUimm<shl, "XVSLLI">;
+defm : PatShiftXrSplatUimm<shl, "XVSLLI">;
+defm : PatShiftXrUimm<loongarch_vslli, "XVSLLI">;
// XVSRL[I]_{B/H/W/D}
defm : PatXrXr<srl, "XVSRL">;
defm : PatShiftXrXr<srl, "XVSRL">;
-defm : PatShiftXrUimm<srl, "XVSRLI">;
+defm : PatShiftXrSplatUimm<srl, "XVSRLI">;
+defm : PatShiftXrUimm<loongarch_vsrli, "XVSRLI">;
// XVSRA[I]_{B/H/W/D}
defm : PatXrXr<sra, "XVSRA">;
defm : PatShiftXrXr<sra, "XVSRA">;
-defm : PatShiftXrUimm<sra, "XVSRAI">;
+defm : PatShiftXrSplatUimm<sra, "XVSRAI">;
// XVCLZ_{B/H/W/D}
defm : PatXr<ctlz, "XVCLZ">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index e37de4f545a2a..b0d880749bf92 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -58,6 +58,12 @@ def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplg
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
+def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>;
+def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>;
+
+def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>;
+def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>;
+
def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<i64, [{return isUInt<3>(Imm);}]>;
@@ -1346,7 +1352,7 @@ multiclass PatShiftVrVr<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_D") LSX128:$vj, LSX128:$vk)>;
}
-multiclass PatShiftVrUimm<SDPatternOperator OpNode, string Inst> {
+multiclass PatShiftVrSplatUimm<SDPatternOperator OpNode, string Inst> {
def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm3 uimm3:$imm))),
(!cast<LAInst>(Inst#"_B") LSX128:$vj, uimm3:$imm)>;
def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm4 uimm4:$imm))),
@@ -1357,6 +1363,17 @@ multiclass PatShiftVrUimm<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_D") LSX128:$vj, uimm6:$imm)>;
}
+multiclass PatShiftVrUimm<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode(v16i8 LSX128:$vj), uimm3:$imm),
+ (!cast<LAInst>(Inst#"_B") LSX128:$vj, uimm3:$imm)>;
+ def : Pat<(OpNode(v8i16 LSX128:$vj), uimm4:$imm),
+ (!cast<LAInst>(Inst#"_H") LSX128:$vj, uimm4:$imm)>;
+ def : Pat<(OpNode(v4i32 LSX128:$vj), uimm5:$imm),
+ (!cast<LAInst>(Inst#"_W") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode(v2i64 LSX128:$vj), uimm6:$imm),
+ (!cast<LAInst>(Inst#"_D") LSX128:$vj, uimm6:$imm)>;
+}
+
multiclass PatCCVrSimm5<CondCode CC, string Inst> {
def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj),
(v16i8 (SplatPat_simm5 simm5:$imm)), CC)),
@@ -1494,20 +1511,32 @@ def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
(VXORI_B LSX128:$vj, uimm8:$imm)>;
+// VBSLL_V
+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32,
+ v2f64] in def : Pat<(loongarch_vbsll(vt LSX128:$vj), uimm5:$imm),
+ (VBSLL_V LSX128:$vj, uimm5:$imm)>;
+
+// VBSRL_V
+foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32,
+ v2f64] in def : Pat<(loongarch_vbsrl(vt LSX128:$vj), uimm5:$imm),
+ (VBSRL_V LSX128:$vj, uimm5:$imm)>;
+
// VSLL[I]_{B/H/W/D}
defm : PatVrVr<shl, "VSLL">;
defm : PatShiftVrVr<shl, "VSLL">;
-defm : PatShiftVrUimm<shl, "VSLLI">;
+defm : PatShiftVrSplatUimm<shl, "VSLLI">;
+defm : PatShiftVrUimm<loongarch_vslli, "VSLLI">;
// VSRL[I]_{B/H/W/D}
defm : PatVrVr<srl, "VSRL">;
defm : PatShiftVrVr<srl, "VSRL">;
-defm : PatShiftVrUimm<srl, "VSRLI">;
+defm : PatShiftVrSplatUimm<srl, "VSRLI">;
+defm : PatShiftVrUimm<loongarch_vsrli, "VSRLI">;
// VSRA[I]_{B/H/W/D}
defm : PatVrVr<sra, "VSRA">;
defm : PatShiftVrVr<sra, "VSRA">;
-defm : PatShiftVrUimm<sra, "VSRAI">;
+defm : PatShiftVrSplatUimm<sra, "VSRAI">;
// VCLZ_{B/H/W/D}
defm : PatVr<ctlz, "VCLZ">;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll
index e0f2659a20224..72e06f680e436 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-bit-shift.ll
@@ -4,65 +4,7 @@
define <32 x i8> @shuffle_to_xvslli_h_8(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_h_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 62
-; CHECK-NEXT: st.b $zero, $sp, 60
-; CHECK-NEXT: st.b $zero, $sp, 58
-; CHECK-NEXT: st.b $zero, $sp, 56
-; CHECK-NEXT: st.b $zero, $sp, 54
-; CHECK-NEXT: st.b $zero, $sp, 52
-; CHECK-NEXT: st.b $zero, $sp, 50
-; CHECK-NEXT: st.b $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 46
-; CHECK-NEXT: st.b $zero, $sp, 44
-; CHECK-NEXT: st.b $zero, $sp, 42
-; CHECK-NEXT: st.b $zero, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 38
-; CHECK-NEXT: st.b $zero, $sp, 36
-; CHECK-NEXT: st.b $zero, $sp, 34
-; CHECK-NEXT: st.b $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 30
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.b $a0, $sp, 28
-; CHECK-NEXT: st.b $a0, $sp, 61
-; CHECK-NEXT: ld.b $a0, $sp, 26
-; CHECK-NEXT: st.b $a0, $sp, 59
-; CHECK-NEXT: ld.b $a0, $sp, 24
-; CHECK-NEXT: st.b $a0, $sp, 57
-; CHECK-NEXT: ld.b $a0, $sp, 22
-; CHECK-NEXT: st.b $a0, $sp, 55
-; CHECK-NEXT: ld.b $a0, $sp, 20
-; CHECK-NEXT: st.b $a0, $sp, 53
-; CHECK-NEXT: ld.b $a0, $sp, 18
-; CHECK-NEXT: st.b $a0, $sp, 51
-; CHECK-NEXT: ld.b $a0, $sp, 16
-; CHECK-NEXT: st.b $a0, $sp, 49
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.h $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 32, i32 2, i32 32, i32 4, i32 32, i32 6, i32 32, i32 8, i32 32, i32 10, i32 32, i32 12, i32 32, i32 14, i32 32, i32 16, i32 32, i32 18, i32 32, i32 20, i32 32, i32 22, i32 32, i32 24, i32 32, i32 26, i32 32, i32 28, i32 32, i32 30>
ret <32 x i8> %shuffle
@@ -71,65 +13,7 @@ define <32 x i8> @shuffle_to_xvslli_h_8(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvsrli_h_8(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_h_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.b $zero, $sp, 61
-; CHECK-NEXT: st.b $zero, $sp, 59
-; CHECK-NEXT: st.b $zero, $sp, 57
-; CHECK-NEXT: st.b $zero, $sp, 55
-; CHECK-NEXT: st.b $zero, $sp, 53
-; CHECK-NEXT: st.b $zero, $sp, 51
-; CHECK-NEXT: st.b $zero, $sp, 49
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.b $zero, $sp, 45
-; CHECK-NEXT: st.b $zero, $sp, 43
-; CHECK-NEXT: st.b $zero, $sp, 41
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: st.b $zero, $sp, 37
-; CHECK-NEXT: st.b $zero, $sp, 35
-; CHECK-NEXT: st.b $zero, $sp, 33
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 62
-; CHECK-NEXT: ld.b $a0, $sp, 29
-; CHECK-NEXT: st.b $a0, $sp, 60
-; CHECK-NEXT: ld.b $a0, $sp, 27
-; CHECK-NEXT: st.b $a0, $sp, 58
-; CHECK-NEXT: ld.b $a0, $sp, 25
-; CHECK-NEXT: st.b $a0, $sp, 56
-; CHECK-NEXT: ld.b $a0, $sp, 23
-; CHECK-NEXT: st.b $a0, $sp, 54
-; CHECK-NEXT: ld.b $a0, $sp, 21
-; CHECK-NEXT: st.b $a0, $sp, 52
-; CHECK-NEXT: ld.b $a0, $sp, 19
-; CHECK-NEXT: st.b $a0, $sp, 50
-; CHECK-NEXT: ld.b $a0, $sp, 17
-; CHECK-NEXT: st.b $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.h $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 32, i32 17, i32 32, i32 19, i32 32, i32 21, i32 32, i32 23, i32 32, i32 25, i32 32, i32 27, i32 32, i32 29, i32 32, i32 31, i32 32>
ret <32 x i8> %shuffle
@@ -138,65 +22,7 @@ define <32 x i8> @shuffle_to_xvsrli_h_8(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvslli_w_8(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_w_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 60
-; CHECK-NEXT: st.b $zero, $sp, 56
-; CHECK-NEXT: st.b $zero, $sp, 52
-; CHECK-NEXT: st.b $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 44
-; CHECK-NEXT: st.b $zero, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 36
-; CHECK-NEXT: st.b $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 30
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.h $a0, $sp, 28
-; CHECK-NEXT: st.h $a0, $sp, 61
-; CHECK-NEXT: ld.b $a0, $sp, 26
-; CHECK-NEXT: st.b $a0, $sp, 59
-; CHECK-NEXT: ld.h $a0, $sp, 24
-; CHECK-NEXT: st.h $a0, $sp, 57
-; CHECK-NEXT: ld.b $a0, $sp, 22
-; CHECK-NEXT: st.b $a0, $sp, 55
-; CHECK-NEXT: ld.h $a0, $sp, 20
-; CHECK-NEXT: st.h $a0, $sp, 53
-; CHECK-NEXT: ld.b $a0, $sp, 18
-; CHECK-NEXT: st.b $a0, $sp, 51
-; CHECK-NEXT: ld.h $a0, $sp, 16
-; CHECK-NEXT: st.h $a0, $sp, 49
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.w $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 1, i32 2, i32 32, i32 4, i32 5, i32 6, i32 32, i32 8, i32 9, i32 10, i32 32, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 32, i32 20, i32 21, i32 22, i32 32, i32 24, i32 25, i32 26, i32 32, i32 28, i32 29, i32 30>
ret <32 x i8> %shuffle
@@ -205,65 +31,7 @@ define <32 x i8> @shuffle_to_xvslli_w_8(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvsrli_w_8(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_w_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.b $zero, $sp, 59
-; CHECK-NEXT: st.b $zero, $sp, 55
-; CHECK-NEXT: st.b $zero, $sp, 51
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.b $zero, $sp, 43
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: st.b $zero, $sp, 35
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 62
-; CHECK-NEXT: ld.h $a0, $sp, 29
-; CHECK-NEXT: st.h $a0, $sp, 60
-; CHECK-NEXT: ld.b $a0, $sp, 27
-; CHECK-NEXT: st.b $a0, $sp, 58
-; CHECK-NEXT: ld.h $a0, $sp, 25
-; CHECK-NEXT: st.h $a0, $sp, 56
-; CHECK-NEXT: ld.b $a0, $sp, 23
-; CHECK-NEXT: st.b $a0, $sp, 54
-; CHECK-NEXT: ld.h $a0, $sp, 21
-; CHECK-NEXT: st.h $a0, $sp, 52
-; CHECK-NEXT: ld.b $a0, $sp, 19
-; CHECK-NEXT: st.b $a0, $sp, 50
-; CHECK-NEXT: ld.h $a0, $sp, 17
-; CHECK-NEXT: st.h $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 32, i32 5, i32 6, i32 7, i32 32, i32 9, i32 10, i32 11, i32 32, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 32, i32 21, i32 22, i32 23, i32 32, i32 25, i32 26, i32 27, i32 32, i32 29, i32 30, i32 31, i32 32>
ret <32 x i8> %shuffle
@@ -272,41 +40,7 @@ define <32 x i8> @shuffle_to_xvsrli_w_8(<32 x i8> %a) nounwind {
define <16 x i16> @shuffle_to_xvslli_w_16(<16 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_w_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 60
-; CHECK-NEXT: st.h $zero, $sp, 56
-; CHECK-NEXT: st.h $zero, $sp, 52
-; CHECK-NEXT: st.h $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.h $zero, $sp, 40
-; CHECK-NEXT: st.h $zero, $sp, 36
-; CHECK-NEXT: st.h $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 28
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.h $a0, $sp, 24
-; CHECK-NEXT: st.h $a0, $sp, 58
-; CHECK-NEXT: ld.h $a0, $sp, 20
-; CHECK-NEXT: st.h $a0, $sp, 54
-; CHECK-NEXT: ld.h $a0, $sp, 16
-; CHECK-NEXT: st.h $a0, $sp, 50
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.w $xr0, $xr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
ret <16 x i16> %shuffle
@@ -315,41 +49,7 @@ define <16 x i16> @shuffle_to_xvslli_w_16(<16 x i16> %a) nounwind {
define <16 x i16> @shuffle_to_xvsrli_w_16(<16 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_w_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.h $zero, $sp, 58
-; CHECK-NEXT: st.h $zero, $sp, 54
-; CHECK-NEXT: st.h $zero, $sp, 50
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.h $zero, $sp, 42
-; CHECK-NEXT: st.h $zero, $sp, 38
-; CHECK-NEXT: st.h $zero, $sp, 34
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 60
-; CHECK-NEXT: ld.h $a0, $sp, 26
-; CHECK-NEXT: st.h $a0, $sp, 56
-; CHECK-NEXT: ld.h $a0, $sp, 22
-; CHECK-NEXT: st.h $a0, $sp, 52
-; CHECK-NEXT: ld.h $a0, $sp, 18
-; CHECK-NEXT: st.h $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
ret <16 x i16> %shuffle
@@ -358,49 +58,7 @@ define <16 x i16> @shuffle_to_xvsrli_w_16(<16 x i16> %a) nounwind {
define <32 x i8> @shuffle_to_xvslli_w_24(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_w_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 62
-; CHECK-NEXT: st.h $zero, $sp, 60
-; CHECK-NEXT: st.b $zero, $sp, 58
-; CHECK-NEXT: st.h $zero, $sp, 56
-; CHECK-NEXT: st.b $zero, $sp, 54
-; CHECK-NEXT: st.h $zero, $sp, 52
-; CHECK-NEXT: st.b $zero, $sp, 50
-; CHECK-NEXT: st.h $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 46
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.b $zero, $sp, 42
-; CHECK-NEXT: st.h $zero, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 38
-; CHECK-NEXT: st.h $zero, $sp, 36
-; CHECK-NEXT: st.b $zero, $sp, 34
-; CHECK-NEXT: st.h $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 28
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.b $a0, $sp, 24
-; CHECK-NEXT: st.b $a0, $sp, 59
-; CHECK-NEXT: ld.b $a0, $sp, 20
-; CHECK-NEXT: st.b $a0, $sp, 55
-; CHECK-NEXT: ld.b $a0, $sp, 16
-; CHECK-NEXT: st.b $a0, $sp, 51
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.w $xr0, $xr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 0, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 8, i32 32, i32 32, i32 32, i32 12, i32 32, i32 32, i32 32, i32 16, i32 32, i32 32, i32 32, i32 20, i32 32, i32 32, i32 32, i32 24, i32 32, i32 32, i32 32, i32 28>
ret <32 x i8> %shuffle
@@ -409,49 +67,7 @@ define <32 x i8> @shuffle_to_xvslli_w_24(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvsrli_w_24(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_w_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.h $zero, $sp, 61
-; CHECK-NEXT: st.b $zero, $sp, 59
-; CHECK-NEXT: st.h $zero, $sp, 57
-; CHECK-NEXT: st.b $zero, $sp, 55
-; CHECK-NEXT: st.h $zero, $sp, 53
-; CHECK-NEXT: st.b $zero, $sp, 51
-; CHECK-NEXT: st.h $zero, $sp, 49
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 45
-; CHECK-NEXT: st.b $zero, $sp, 43
-; CHECK-NEXT: st.h $zero, $sp, 41
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: st.h $zero, $sp, 37
-; CHECK-NEXT: st.b $zero, $sp, 35
-; CHECK-NEXT: st.h $zero, $sp, 33
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 60
-; CHECK-NEXT: ld.b $a0, $sp, 27
-; CHECK-NEXT: st.b $a0, $sp, 56
-; CHECK-NEXT: ld.b $a0, $sp, 23
-; CHECK-NEXT: st.b $a0, $sp, 52
-; CHECK-NEXT: ld.b $a0, $sp, 19
-; CHECK-NEXT: st.b $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.w $xr0, $xr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32, i32 11, i32 32, i32 32, i32 32, i32 15, i32 32, i32 32, i32 32, i32 19, i32 32, i32 32, i32 32, i32 23, i32 32, i32 32, i32 32, i32 27, i32 32, i32 32, i32 32, i32 31, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -460,61 +76,7 @@ define <32 x i8> @shuffle_to_xvsrli_w_24(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvslli_d_8(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_d_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 56
-; CHECK-NEXT: st.b $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 30
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.h $a0, $sp, 28
-; CHECK-NEXT: st.h $a0, $sp, 61
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 57
-; CHECK-NEXT: ld.b $a0, $sp, 22
-; CHECK-NEXT: st.b $a0, $sp, 55
-; CHECK-NEXT: ld.h $a0, $sp, 20
-; CHECK-NEXT: st.h $a0, $sp, 53
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 49
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.d $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
ret <32 x i8> %shuffle
@@ -523,61 +85,7 @@ define <32 x i8> @shuffle_to_xvslli_d_8(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvsrli_d_8(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_d_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.b $zero, $sp, 55
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 62
-; CHECK-NEXT: ld.h $a0, $sp, 29
-; CHECK-NEXT: st.h $a0, $sp, 60
-; CHECK-NEXT: ld.w $a0, $sp, 25
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.b $a0, $sp, 23
-; CHECK-NEXT: st.b $a0, $sp, 54
-; CHECK-NEXT: ld.h $a0, $sp, 21
-; CHECK-NEXT: st.h $a0, $sp, 52
-; CHECK-NEXT: ld.w $a0, $sp, 17
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 32, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
ret <32 x i8> %shuffle
@@ -586,41 +94,7 @@ define <32 x i8> @shuffle_to_xvsrli_d_8(<32 x i8> %a) nounwind {
define <16 x i16> @shuffle_to_xvslli_d_16(<16 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_d_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 56
-; CHECK-NEXT: st.h $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 40
-; CHECK-NEXT: st.h $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 28
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 58
-; CHECK-NEXT: ld.h $a0, $sp, 20
-; CHECK-NEXT: st.h $a0, $sp, 54
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 50
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.d $xr0, $xr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 16, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 16, i32 12, i32 13, i32 14>
ret <16 x i16> %shuffle
@@ -629,41 +103,7 @@ define <16 x i16> @shuffle_to_xvslli_d_16(<16 x i16> %a) nounwind {
define <16 x i16> @shuffle_to_xvsrli_d_16(<16 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_d_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.h $zero, $sp, 54
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.h $zero, $sp, 38
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 60
-; CHECK-NEXT: ld.w $a0, $sp, 26
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.h $a0, $sp, 22
-; CHECK-NEXT: st.h $a0, $sp, 52
-; CHECK-NEXT: ld.w $a0, $sp, 18
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 16, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 16, i32 13, i32 14, i32 15, i32 16>
ret <16 x i16> %shuffle
@@ -672,53 +112,7 @@ define <16 x i16> @shuffle_to_xvsrli_d_16(<16 x i16> %a) nounwind {
define <32 x i8> @shuffle_to_xvslli_d_24(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_d_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 58
-; CHECK-NEXT: st.h $zero, $sp, 56
-; CHECK-NEXT: st.b $zero, $sp, 50
-; CHECK-NEXT: st.h $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 42
-; CHECK-NEXT: st.h $zero, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 34
-; CHECK-NEXT: st.h $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 28
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 59
-; CHECK-NEXT: ld.b $a0, $sp, 20
-; CHECK-NEXT: st.b $a0, $sp, 55
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 51
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.d $xr0, $xr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28>
ret <32 x i8> %shuffle
@@ -727,53 +121,7 @@ define <32 x i8> @shuffle_to_xvslli_d_24(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvsrli_d_24(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_d_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.h $zero, $sp, 61
-; CHECK-NEXT: st.b $zero, $sp, 55
-; CHECK-NEXT: st.h $zero, $sp, 53
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 45
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: st.h $zero, $sp, 37
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 60
-; CHECK-NEXT: ld.w $a0, $sp, 27
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.b $a0, $sp, 23
-; CHECK-NEXT: st.b $a0, $sp, 52
-; CHECK-NEXT: ld.w $a0, $sp, 19
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 19, i32 20, i32 21, i32 22, i32 23, i32 32, i32 32, i32 32, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -782,28 +130,7 @@ define <32 x i8> @shuffle_to_xvsrli_d_24(<32 x i8> %a) nounwind {
define <8 x i32> @shuffle_to_xvslli_d_32(<8 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_d_32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.w $zero, $sp, 24
-; CHECK-NEXT: st.w $zero, $sp, 16
-; CHECK-NEXT: st.w $zero, $sp, 8
-; CHECK-NEXT: st.w $zero, $sp, 0
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6
-; CHECK-NEXT: st.w $a0, $sp, 28
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
-; CHECK-NEXT: st.w $a0, $sp, 20
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
-; CHECK-NEXT: st.w $a0, $sp, 12
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
-; CHECK-NEXT: st.w $a0, $sp, 4
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvslli.d $xr0, $xr0, 32
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
ret <8 x i32> %shuffle
@@ -812,28 +139,7 @@ define <8 x i32> @shuffle_to_xvslli_d_32(<8 x i32> %a) nounwind {
define <8 x i32> @shuffle_to_xvsrli_d_32(<8 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_d_32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.w $zero, $sp, 28
-; CHECK-NEXT: st.w $zero, $sp, 20
-; CHECK-NEXT: st.w $zero, $sp, 12
-; CHECK-NEXT: st.w $zero, $sp, 4
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
-; CHECK-NEXT: st.w $a0, $sp, 24
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5
-; CHECK-NEXT: st.w $a0, $sp, 16
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
-; CHECK-NEXT: st.w $a0, $sp, 8
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
-; CHECK-NEXT: st.w $a0, $sp, 0
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 32
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 3, i32 8, i32 5, i32 8, i32 7, i32 8>
ret <8 x i32> %shuffle
@@ -842,45 +148,7 @@ define <8 x i32> @shuffle_to_xvsrli_d_32(<8 x i32> %a) nounwind {
define <32 x i8> @shuffle_to_xvslli_d_40(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_d_40:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.b $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 44
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 36
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 26
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.h $a0, $sp, 24
-; CHECK-NEXT: st.h $a0, $sp, 61
-; CHECK-NEXT: ld.b $a0, $sp, 18
-; CHECK-NEXT: st.b $a0, $sp, 55
-; CHECK-NEXT: ld.h $a0, $sp, 16
-; CHECK-NEXT: st.h $a0, $sp, 53
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.d $xr0, $xr0, 40
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26>
ret <32 x i8> %shuffle
@@ -889,45 +157,7 @@ define <32 x i8> @shuffle_to_xvslli_d_40(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvsrli_d_40(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_d_40:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.w $zero, $sp, 59
-; CHECK-NEXT: st.b $zero, $sp, 55
-; CHECK-NEXT: st.w $zero, $sp, 51
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.w $zero, $sp, 43
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: st.w $zero, $sp, 35
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 58
-; CHECK-NEXT: ld.h $a0, $sp, 29
-; CHECK-NEXT: st.h $a0, $sp, 56
-; CHECK-NEXT: ld.b $a0, $sp, 23
-; CHECK-NEXT: st.b $a0, $sp, 50
-; CHECK-NEXT: ld.h $a0, $sp, 21
-; CHECK-NEXT: st.h $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 40
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 21, i32 22, i32 23, i32 32, i32 32, i32 32, i32 32, i32 32, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -936,33 +166,7 @@ define <32 x i8> @shuffle_to_xvsrli_d_40(<32 x i8> %a) nounwind {
define <16 x i16> @shuffle_to_xvslli_d_48(<16 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_d_48:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.h $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.h $zero, $sp, 36
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 24
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.h $a0, $sp, 16
-; CHECK-NEXT: st.h $a0, $sp, 54
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.d $xr0, $xr0, 48
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
ret <16 x i16> %shuffle
@@ -971,33 +175,7 @@ define <16 x i16> @shuffle_to_xvslli_d_48(<16 x i16> %a) nounwind {
define <16 x i16> @shuffle_to_xvsrli_d_48(<16 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_d_48:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.w $zero, $sp, 58
-; CHECK-NEXT: st.h $zero, $sp, 54
-; CHECK-NEXT: st.w $zero, $sp, 50
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.w $zero, $sp, 42
-; CHECK-NEXT: st.h $zero, $sp, 38
-; CHECK-NEXT: st.w $zero, $sp, 34
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 56
-; CHECK-NEXT: ld.h $a0, $sp, 22
-; CHECK-NEXT: st.h $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 48
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 3, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 15, i32 16, i32 16, i32 16>
ret <16 x i16> %shuffle
@@ -1006,37 +184,7 @@ define <16 x i16> @shuffle_to_xvsrli_d_48(<16 x i16> %a) nounwind {
define <32 x i8> @shuffle_to_xvslli_d_56(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvslli_d_56:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 62
-; CHECK-NEXT: st.h $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.b $zero, $sp, 54
-; CHECK-NEXT: st.h $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 46
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 38
-; CHECK-NEXT: st.h $zero, $sp, 36
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 24
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.b $a0, $sp, 16
-; CHECK-NEXT: st.b $a0, $sp, 55
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvslli.d $xr0, $xr0, 56
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24>
ret <32 x i8> %shuffle
@@ -1045,39 +193,8 @@ define <32 x i8> @shuffle_to_xvslli_d_56(<32 x i8> %a) nounwind {
define <32 x i8> @shuffle_to_xvsrli_d_56(<32 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_xvsrli_d_56:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.h $zero, $sp, 61
-; CHECK-NEXT: st.w $zero, $sp, 57
-; CHECK-NEXT: st.b $zero, $sp, 55
-; CHECK-NEXT: st.h $zero, $sp, 53
-; CHECK-NEXT: st.w $zero, $sp, 49
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 45
-; CHECK-NEXT: st.w $zero, $sp, 41
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: st.h $zero, $sp, 37
-; CHECK-NEXT: st.w $zero, $sp, 33
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 56
-; CHECK-NEXT: ld.b $a0, $sp, 23
-; CHECK-NEXT: st.b $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvsrli.d $xr0, $xr0, 56
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 23, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
}
-
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll
index 111f09406ab7c..15bfce902f9dd 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-shift.ll
@@ -4,61 +4,7 @@
define <32 x i8> @shuffle_32i8_byte_left_shift_1(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 30
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.h $a0, $sp, 28
-; CHECK-NEXT: st.h $a0, $sp, 61
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 57
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 49
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
ret <32 x i8> %shuffle
@@ -67,57 +13,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_1(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_2(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 28
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 58
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 50
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
ret <32 x i8> %shuffle
@@ -126,57 +22,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_2(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_3(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 50
-; CHECK-NEXT: st.h $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 34
-; CHECK-NEXT: st.h $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 28
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 59
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 51
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
ret <32 x i8> %shuffle
@@ -185,51 +31,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_3(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_4(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 60
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 52
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
ret <32 x i8> %shuffle
@@ -238,53 +40,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_4(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_5(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_5:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 36
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 26
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.h $a0, $sp, 24
-; CHECK-NEXT: st.h $a0, $sp, 61
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 53
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 5
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
ret <32 x i8> %shuffle
@@ -293,49 +49,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_5(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_6(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 36
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 24
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 54
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25>
ret <32 x i8> %shuffle
@@ -344,49 +58,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_6(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_7(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_7:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 54
-; CHECK-NEXT: st.h $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 38
-; CHECK-NEXT: st.h $zero, $sp, 36
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 24
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 55
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 7
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24>
ret <32 x i8> %shuffle
@@ -395,41 +67,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_7(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_8(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 56
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <32 x i8> %shuffle
@@ -438,45 +76,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_8(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_9(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_9:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 22
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.h $a0, $sp, 20
-; CHECK-NEXT: st.h $a0, $sp, 61
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 57
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
ret <32 x i8> %shuffle
@@ -485,41 +85,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_9(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_10(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 20
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 58
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21>
ret <32 x i8> %shuffle
@@ -528,41 +94,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_10(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_11(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_11:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 58
-; CHECK-NEXT: st.h $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 42
-; CHECK-NEXT: st.h $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 20
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 59
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 11
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 4, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 20>
ret <32 x i8> %shuffle
@@ -571,35 +103,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_11(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_12(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 60
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19>
ret <32 x i8> %shuffle
@@ -608,37 +112,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_12(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_13(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_13:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 44
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 18
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: ld.h $a0, $sp, 16
-; CHECK-NEXT: st.h $a0, $sp, 61
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 13
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18>
ret <32 x i8> %shuffle
@@ -647,33 +121,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_13(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_14(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 16
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17>
ret <32 x i8> %shuffle
@@ -682,33 +130,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_14(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_left_shift_15(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_left_shift_15:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 62
-; CHECK-NEXT: st.h $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.b $zero, $sp, 46
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.b $a0, $sp, 16
-; CHECK-NEXT: st.b $a0, $sp, 63
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 0
-; CHECK-NEXT: st.b $a0, $sp, 47
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 15
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16>
ret <32 x i8> %shuffle
@@ -717,43 +139,7 @@ define <32 x i8> @shuffle_32i8_byte_left_shift_15(<32 x i8> %a) {
define <16 x i16> @shuffle_16i16_byte_left_shift_2(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_left_shift_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 28
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 58
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 50
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i16> %shuffle
@@ -762,39 +148,7 @@ define <16 x i16> @shuffle_16i16_byte_left_shift_2(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_left_shift_4(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_left_shift_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.w $a0, $sp, 24
-; CHECK-NEXT: st.w $a0, $sp, 60
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 52
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
ret <16 x i16> %shuffle
@@ -803,39 +157,7 @@ define <16 x i16> @shuffle_16i16_byte_left_shift_4(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_left_shift_6(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_left_shift_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 36
-; CHECK-NEXT: st.w $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 24
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 54
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <16 x i16> %shuffle
@@ -844,33 +166,7 @@ define <16 x i16> @shuffle_16i16_byte_left_shift_6(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_left_shift_8(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_left_shift_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.d $a0, $sp, 16
-; CHECK-NEXT: st.d $a0, $sp, 56
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11>
ret <16 x i16> %shuffle
@@ -879,35 +175,7 @@ define <16 x i16> @shuffle_16i16_byte_left_shift_8(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_left_shift_10(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_left_shift_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 20
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 58
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10>
ret <16 x i16> %shuffle
@@ -916,31 +184,7 @@ define <16 x i16> @shuffle_16i16_byte_left_shift_10(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_left_shift_12(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_left_shift_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.w $a0, $sp, 16
-; CHECK-NEXT: st.w $a0, $sp, 60
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9>
ret <16 x i16> %shuffle
@@ -949,31 +193,7 @@ define <16 x i16> @shuffle_16i16_byte_left_shift_12(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_left_shift_14(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_left_shift_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 48
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.w $zero, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: ld.h $a0, $sp, 16
-; CHECK-NEXT: st.h $a0, $sp, 62
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $sp, 46
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
ret <16 x i16> %shuffle
@@ -982,34 +202,7 @@ define <16 x i16> @shuffle_16i16_byte_left_shift_14(<16 x i16> %a) {
define <8 x i32> @shuffle_8i32_byte_left_shift_4(<8 x i32> %a) {
; CHECK-LABEL: shuffle_8i32_byte_left_shift_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.w $zero, $sp, 16
-; CHECK-NEXT: st.w $zero, $sp, 0
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6
-; CHECK-NEXT: st.w $a0, $sp, 28
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5
-; CHECK-NEXT: st.w $a0, $sp, 24
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
-; CHECK-NEXT: st.w $a0, $sp, 20
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
-; CHECK-NEXT: st.w $a0, $sp, 12
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
-; CHECK-NEXT: st.w $a0, $sp, 8
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
-; CHECK-NEXT: st.w $a0, $sp, 4
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6>
ret <8 x i32> %shuffle
@@ -1018,30 +211,7 @@ define <8 x i32> @shuffle_8i32_byte_left_shift_4(<8 x i32> %a) {
define <8 x i32> @shuffle_8i32_byte_left_shift_8(<8 x i32> %a) {
; CHECK-LABEL: shuffle_8i32_byte_left_shift_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.d $zero, $sp, 16
-; CHECK-NEXT: st.d $zero, $sp, 0
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5
-; CHECK-NEXT: st.w $a0, $sp, 28
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
-; CHECK-NEXT: st.w $a0, $sp, 24
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
-; CHECK-NEXT: st.w $a0, $sp, 12
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
-; CHECK-NEXT: st.w $a0, $sp, 8
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 0, i32 1, i32 8, i32 8, i32 4, i32 5>
ret <8 x i32> %shuffle
@@ -1050,28 +220,7 @@ define <8 x i32> @shuffle_8i32_byte_left_shift_8(<8 x i32> %a) {
define <8 x i32> @shuffle_8i32_byte_left_shift_12(<8 x i32> %a) {
; CHECK-LABEL: shuffle_8i32_byte_left_shift_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.w $zero, $sp, 24
-; CHECK-NEXT: st.d $zero, $sp, 16
-; CHECK-NEXT: st.w $zero, $sp, 8
-; CHECK-NEXT: st.d $zero, $sp, 0
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 4
-; CHECK-NEXT: st.w $a0, $sp, 28
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 0
-; CHECK-NEXT: st.w $a0, $sp, 12
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
ret <8 x i32> %shuffle
@@ -1080,26 +229,7 @@ define <8 x i32> @shuffle_8i32_byte_left_shift_12(<8 x i32> %a) {
define <4 x i64> @shuffle_4i64_byte_left_shift_8(<4 x i64> %a) {
; CHECK-LABEL: shuffle_4i64_byte_left_shift_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.d $zero, $sp, 16
-; CHECK-NEXT: st.d $zero, $sp, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 2
-; CHECK-NEXT: st.d $a0, $sp, 24
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT: st.d $a0, $sp, 8
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 2>
ret <4 x i64> %shuffle
@@ -1108,61 +238,7 @@ define <4 x i64> @shuffle_4i64_byte_left_shift_8(<4 x i64> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_1(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 62
-; CHECK-NEXT: ld.h $a0, $sp, 29
-; CHECK-NEXT: st.h $a0, $sp, 60
-; CHECK-NEXT: ld.w $a0, $sp, 25
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 17
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 46
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32>
ret <32 x i8> %shuffle
@@ -1171,57 +247,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_1(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_2(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 60
-; CHECK-NEXT: ld.w $a0, $sp, 26
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 18
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 45
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 2
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1230,57 +256,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_2(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_3(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.h $zero, $sp, 61
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 45
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 60
-; CHECK-NEXT: ld.w $a0, $sp, 27
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 19
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 3
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1289,51 +265,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_3(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_4(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.w $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 44
-; CHECK-NEXT: ld.w $a0, $sp, 28
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 20
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 43
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 4
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1342,53 +274,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_4(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_5(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_5:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.w $zero, $sp, 59
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.w $zero, $sp, 43
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 58
-; CHECK-NEXT: ld.h $a0, $sp, 29
-; CHECK-NEXT: st.h $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 21
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 5
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 5
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1397,49 +283,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_5(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_6(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.w $zero, $sp, 58
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.w $zero, $sp, 42
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 22
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 41
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 6
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1448,49 +292,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_6(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_7(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_7:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.h $zero, $sp, 61
-; CHECK-NEXT: st.w $zero, $sp, 57
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 45
-; CHECK-NEXT: st.w $zero, $sp, 41
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 23
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 7
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 7
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1499,41 +301,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_7(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_8(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.d $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 40
-; CHECK-NEXT: ld.d $a0, $sp, 24
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 39
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 8
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1542,45 +310,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_8(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_9(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_9:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.d $zero, $sp, 55
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.d $zero, $sp, 39
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 54
-; CHECK-NEXT: ld.h $a0, $sp, 29
-; CHECK-NEXT: st.h $a0, $sp, 52
-; CHECK-NEXT: ld.w $a0, $sp, 25
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 9
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1589,41 +319,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_9(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_10(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.d $zero, $sp, 54
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.d $zero, $sp, 38
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 52
-; CHECK-NEXT: ld.w $a0, $sp, 26
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 37
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 10
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1632,41 +328,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_10(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_11(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_11:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.h $zero, $sp, 61
-; CHECK-NEXT: st.d $zero, $sp, 53
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 45
-; CHECK-NEXT: st.d $zero, $sp, 37
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 52
-; CHECK-NEXT: ld.w $a0, $sp, 27
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 11
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 11
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1675,35 +337,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_11(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_12(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.w $zero, $sp, 60
-; CHECK-NEXT: st.d $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 44
-; CHECK-NEXT: st.d $zero, $sp, 36
-; CHECK-NEXT: ld.w $a0, $sp, 28
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 35
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 12
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 12, i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 28, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1712,37 +346,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_12(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_13(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_13:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.w $zero, $sp, 59
-; CHECK-NEXT: st.d $zero, $sp, 51
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.w $zero, $sp, 43
-; CHECK-NEXT: st.d $zero, $sp, 35
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 50
-; CHECK-NEXT: ld.h $a0, $sp, 29
-; CHECK-NEXT: st.h $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 13
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 13
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 29, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1751,33 +355,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_13(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_14(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.w $zero, $sp, 58
-; CHECK-NEXT: st.d $zero, $sp, 50
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.w $zero, $sp, 42
-; CHECK-NEXT: st.d $zero, $sp, 34
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 33
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 14
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 14, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 30, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1786,33 +364,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_14(<32 x i8> %a) {
define <32 x i8> @shuffle_32i8_byte_right_shift_15(<32 x i8> %a) {
; CHECK-LABEL: shuffle_32i8_byte_right_shift_15:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.b $zero, $sp, 63
-; CHECK-NEXT: st.h $zero, $sp, 61
-; CHECK-NEXT: st.w $zero, $sp, 57
-; CHECK-NEXT: st.d $zero, $sp, 49
-; CHECK-NEXT: st.b $zero, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 45
-; CHECK-NEXT: st.w $zero, $sp, 41
-; CHECK-NEXT: st.d $zero, $sp, 33
-; CHECK-NEXT: ld.b $a0, $sp, 31
-; CHECK-NEXT: st.b $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT: st.b $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 15
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 31, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
ret <32 x i8> %shuffle
@@ -1821,43 +373,7 @@ define <32 x i8> @shuffle_32i8_byte_right_shift_15(<32 x i8> %a) {
define <16 x i16> @shuffle_16i16_byte_right_shift_2(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_right_shift_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 60
-; CHECK-NEXT: ld.w $a0, $sp, 26
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 18
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 44
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
ret <16 x i16> %shuffle
@@ -1866,39 +382,7 @@ define <16 x i16> @shuffle_16i16_byte_right_shift_2(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_right_shift_4(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_right_shift_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.w $zero, $sp, 60
-; CHECK-NEXT: st.w $zero, $sp, 44
-; CHECK-NEXT: ld.w $a0, $sp, 28
-; CHECK-NEXT: st.w $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 20
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 42
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 2
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16>
ret <16 x i16> %shuffle
@@ -1907,39 +391,7 @@ define <16 x i16> @shuffle_16i16_byte_right_shift_4(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_right_shift_6(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_right_shift_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.w $zero, $sp, 58
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.w $zero, $sp, 42
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 56
-; CHECK-NEXT: ld.d $a0, $sp, 22
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 40
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 3
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16>
ret <16 x i16> %shuffle
@@ -1948,33 +400,7 @@ define <16 x i16> @shuffle_16i16_byte_right_shift_6(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_right_shift_8(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_right_shift_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.d $zero, $sp, 56
-; CHECK-NEXT: st.d $zero, $sp, 40
-; CHECK-NEXT: ld.d $a0, $sp, 24
-; CHECK-NEXT: st.d $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 38
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 4
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16>
ret <16 x i16> %shuffle
@@ -1983,35 +409,7 @@ define <16 x i16> @shuffle_16i16_byte_right_shift_8(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_right_shift_10(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_right_shift_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.d $zero, $sp, 54
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.d $zero, $sp, 38
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 52
-; CHECK-NEXT: ld.w $a0, $sp, 26
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 36
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 5
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i16> %shuffle
@@ -2020,31 +418,7 @@ define <16 x i16> @shuffle_16i16_byte_right_shift_10(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_right_shift_12(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_right_shift_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.w $zero, $sp, 60
-; CHECK-NEXT: st.d $zero, $sp, 52
-; CHECK-NEXT: st.w $zero, $sp, 44
-; CHECK-NEXT: st.d $zero, $sp, 36
-; CHECK-NEXT: ld.w $a0, $sp, 28
-; CHECK-NEXT: st.w $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 34
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 6
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i16> %shuffle
@@ -2053,31 +427,7 @@ define <16 x i16> @shuffle_16i16_byte_right_shift_12(<16 x i16> %a) {
define <16 x i16> @shuffle_16i16_byte_right_shift_14(<16 x i16> %a) {
; CHECK-LABEL: shuffle_16i16_byte_right_shift_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: .cfi_def_cfa_offset 96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: st.h $zero, $sp, 62
-; CHECK-NEXT: st.w $zero, $sp, 58
-; CHECK-NEXT: st.d $zero, $sp, 50
-; CHECK-NEXT: st.h $zero, $sp, 46
-; CHECK-NEXT: st.w $zero, $sp, 42
-; CHECK-NEXT: st.d $zero, $sp, 34
-; CHECK-NEXT: ld.h $a0, $sp, 30
-; CHECK-NEXT: st.h $a0, $sp, 48
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT: st.h $a0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i16> %shuffle
@@ -2086,34 +436,7 @@ define <16 x i16> @shuffle_16i16_byte_right_shift_14(<16 x i16> %a) {
define <8 x i32> @shuffle_8i32_byte_right_shift_4(<8 x i32> %a) {
; CHECK-LABEL: shuffle_8i32_byte_right_shift_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.w $zero, $sp, 28
-; CHECK-NEXT: st.w $zero, $sp, 12
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
-; CHECK-NEXT: st.w $a0, $sp, 24
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6
-; CHECK-NEXT: st.w $a0, $sp, 20
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 5
-; CHECK-NEXT: st.w $a0, $sp, 16
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
-; CHECK-NEXT: st.w $a0, $sp, 8
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
-; CHECK-NEXT: st.w $a0, $sp, 4
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
-; CHECK-NEXT: st.w $a0, $sp, 0
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
ret <8 x i32> %shuffle
@@ -2122,30 +445,7 @@ define <8 x i32> @shuffle_8i32_byte_right_shift_4(<8 x i32> %a) {
define <8 x i32> @shuffle_8i32_byte_right_shift_8(<8 x i32> %a) {
; CHECK-LABEL: shuffle_8i32_byte_right_shift_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.d $zero, $sp, 24
-; CHECK-NEXT: st.d $zero, $sp, 8
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
-; CHECK-NEXT: st.w $a0, $sp, 20
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 6
-; CHECK-NEXT: st.w $a0, $sp, 16
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
-; CHECK-NEXT: st.w $a0, $sp, 4
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 2
-; CHECK-NEXT: st.w $a0, $sp, 0
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 8, i32 8, i32 6, i32 7, i32 8, i32 8>
ret <8 x i32> %shuffle
@@ -2154,28 +454,7 @@ define <8 x i32> @shuffle_8i32_byte_right_shift_8(<8 x i32> %a) {
define <8 x i32> @shuffle_8i32_byte_right_shift_12(<8 x i32> %a) {
; CHECK-LABEL: shuffle_8i32_byte_right_shift_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.w $zero, $sp, 28
-; CHECK-NEXT: st.d $zero, $sp, 20
-; CHECK-NEXT: st.w $zero, $sp, 12
-; CHECK-NEXT: st.d $zero, $sp, 4
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
-; CHECK-NEXT: st.w $a0, $sp, 16
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 3
-; CHECK-NEXT: st.w $a0, $sp, 0
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
ret <8 x i32> %shuffle
@@ -2184,28 +463,8 @@ define <8 x i32> @shuffle_8i32_byte_right_shift_12(<8 x i32> %a) {
define <4 x i64> @shuffle_4i64_byte_right_shift_8(<4 x i64> %a) {
; CHECK-LABEL: shuffle_4i64_byte_right_shift_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi.d $sp, $sp, -64
-; CHECK-NEXT: .cfi_def_cfa_offset 64
-; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
-; CHECK-NEXT: .cfi_offset 1, -8
-; CHECK-NEXT: .cfi_offset 22, -16
-; CHECK-NEXT: addi.d $fp, $sp, 64
-; CHECK-NEXT: .cfi_def_cfa 22, 0
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.d $zero, $sp, 24
-; CHECK-NEXT: st.d $zero, $sp, 8
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: st.d $a0, $sp, 16
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: st.d $a0, $sp, 0
-; CHECK-NEXT: xvld $xr0, $sp, 0
-; CHECK-NEXT: addi.d $sp, $fp, -64
-; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
ret <4 x i64> %shuffle
}
-
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index 984b6f3d74866..d84e408cd28be 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -374,11 +374,8 @@ define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI24_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI24_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vst $vr1, $a1, 0
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 32
+; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%e = extractelement <4 x i32> %v, i32 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll
index b590103511847..48f18a35a38c4 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll
@@ -4,10 +4,7 @@
define <16 x i8> @shuffle_to_vslli_h_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_h_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.h $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
ret <16 x i8> %shuffle
@@ -16,10 +13,7 @@ define <16 x i8> @shuffle_to_vslli_h_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_h_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_h_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.h $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
ret <16 x i8> %shuffle
@@ -28,10 +22,7 @@ define <16 x i8> @shuffle_to_vsrli_h_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_w_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_w_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.w $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 1, i32 2, i32 16, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 16, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
@@ -40,10 +31,7 @@ define <16 x i8> @shuffle_to_vslli_w_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_w_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_w_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 2, i32 3, i32 16, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 16, i32 13, i32 14, i32 15, i32 16>
ret <16 x i8> %shuffle
@@ -52,11 +40,7 @@ define <16 x i8> @shuffle_to_vsrli_w_8(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_to_vslli_w_16(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_w_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vslli.w $vr0, $vr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
ret <8 x i16> %shuffle
@@ -65,11 +49,7 @@ define <8 x i16> @shuffle_to_vslli_w_16(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_to_vsrli_w_16(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_w_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 5, i32 8, i32 7, i32 8>
ret <8 x i16> %shuffle
@@ -78,10 +58,7 @@ define <8 x i16> @shuffle_to_vsrli_w_16(<8 x i16> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_w_24(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_w_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.w $vr0, $vr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
ret <16 x i8> %shuffle
@@ -90,10 +67,7 @@ define <16 x i8> @shuffle_to_vslli_w_24(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_w_24(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_w_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 3, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 15, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -102,10 +76,7 @@ define <16 x i8> @shuffle_to_vsrli_w_24(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_d_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
@@ -114,10 +85,7 @@ define <16 x i8> @shuffle_to_vslli_d_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_d_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
ret <16 x i8> %shuffle
@@ -126,11 +94,7 @@ define <16 x i8> @shuffle_to_vsrli_d_8(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_to_vslli_d_16(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vslli.d $vr0, $vr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6>
ret <8 x i16> %shuffle
@@ -139,11 +103,7 @@ define <8 x i16> @shuffle_to_vslli_d_16(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_to_vsrli_d_16(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
ret <8 x i16> %shuffle
@@ -152,10 +112,7 @@ define <8 x i16> @shuffle_to_vsrli_d_16(<8 x i16> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_d_24(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI12_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <16 x i8> %shuffle
@@ -164,10 +121,7 @@ define <16 x i8> @shuffle_to_vslli_d_24(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_d_24(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI13_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -176,11 +130,7 @@ define <16 x i8> @shuffle_to_vsrli_d_24(<16 x i8> %a) nounwind {
define <4 x i32> @shuffle_to_vslli_d_32(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_32:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI14_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vslli.d $vr0, $vr0, 32
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 poison>
ret <4 x i32> %shuffle
@@ -189,11 +139,7 @@ define <4 x i32> @shuffle_to_vslli_d_32(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_to_vsrli_d_32(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_32:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI15_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI15_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 32
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
ret <4 x i32> %shuffle
@@ -202,10 +148,7 @@ define <4 x i32> @shuffle_to_vsrli_d_32(<4 x i32> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_d_40(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_40:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 40
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10>
ret <16 x i8> %shuffle
@@ -214,10 +157,7 @@ define <16 x i8> @shuffle_to_vslli_d_40(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_d_40(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_40:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 40
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -226,11 +166,7 @@ define <16 x i8> @shuffle_to_vsrli_d_40(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_to_vslli_d_48(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_48:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI18_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI18_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vslli.d $vr0, $vr0, 48
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
ret <8 x i16> %shuffle
@@ -239,11 +175,7 @@ define <8 x i16> @shuffle_to_vslli_d_48(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_to_vsrli_d_48(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_48:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI19_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI19_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 48
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -252,10 +184,7 @@ define <8 x i16> @shuffle_to_vsrli_d_48(<8 x i16> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_d_56(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_56:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI20_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI20_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 56
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
ret <16 x i8> %shuffle
@@ -264,10 +193,7 @@ define <16 x i8> @shuffle_to_vslli_d_56(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_d_56(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_56:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI21_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 56
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index 8156239f81963..720fe919601e6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -4,10 +4,7 @@
define <16 x i8> @shuffle_16i8_vbsll_v_1(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
@@ -16,10 +13,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_1(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_2(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
ret <16 x i8> %shuffle
@@ -28,10 +22,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_2(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_3(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <16 x i8> %shuffle
@@ -40,10 +31,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_3(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_4(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <16 x i8> %shuffle
@@ -52,10 +40,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_4(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_5(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_5:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 5
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <16 x i8> %shuffle
@@ -64,10 +49,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_5(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_6(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
ret <16 x i8> %shuffle
@@ -76,10 +58,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_6(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_7(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_7:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 7
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <16 x i8> %shuffle
@@ -88,10 +67,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_7(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <16 x i8> %shuffle
@@ -100,10 +76,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_9(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_9:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <16 x i8> %shuffle
@@ -112,10 +85,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_9(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_10(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
ret <16 x i8> %shuffle
@@ -124,10 +94,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_10(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_11(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_11:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 11
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <16 x i8> %shuffle
@@ -136,10 +103,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_11(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_12(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3>
ret <16 x i8> %shuffle
@@ -148,10 +112,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_12(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_13(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_13:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI12_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 13
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2>
ret <16 x i8> %shuffle
@@ -160,10 +121,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_13(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_14(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI13_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1>
ret <16 x i8> %shuffle
@@ -172,10 +130,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_14(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_15(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_15:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI14_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 15
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0>
ret <16 x i8> %shuffle
@@ -184,11 +139,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_15(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_2(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI15_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI15_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <8 x i16> %shuffle
@@ -197,11 +148,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_2(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_4(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
ret <8 x i16> %shuffle
@@ -210,11 +157,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_4(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_6(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle
@@ -223,11 +166,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_6(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_8(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI18_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI18_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3>
ret <8 x i16> %shuffle
@@ -236,11 +175,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_8(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_10(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI19_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI19_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2>
ret <8 x i16> %shuffle
@@ -249,11 +184,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_10(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_12(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI20_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI20_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 1>
ret <8 x i16> %shuffle
@@ -262,11 +193,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_12(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_14(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI21_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0>
ret <8 x i16> %shuffle
@@ -275,11 +202,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_14(<8 x i16> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsll_v_4(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsll_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI22_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI22_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
@@ -288,11 +211,7 @@ define <4 x i32> @shuffle_4i32_vbsll_v_4(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsll_v_8(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsll_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI23_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI23_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 0, i32 1>
ret <4 x i32> %shuffle
@@ -301,11 +220,7 @@ define <4 x i32> @shuffle_4i32_vbsll_v_8(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsll_v_12(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsll_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI24_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI24_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 4, i32 0>
ret <4 x i32> %shuffle
@@ -324,10 +239,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_1(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI26_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI26_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
ret <16 x i8> %shuffle
@@ -336,10 +248,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_1(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_2(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI27_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI27_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -348,10 +257,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_2(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_3(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI28_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI28_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -360,10 +266,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_3(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_4(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI29_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI29_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -372,10 +275,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_4(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_5(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_5:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI30_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI30_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 5
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -384,10 +284,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_5(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_6(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI31_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI31_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -396,10 +293,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_6(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_7(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_7:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI32_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI32_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 7
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -408,10 +302,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_7(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI33_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI33_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -420,10 +311,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_9(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_9:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI34_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI34_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -432,10 +320,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_9(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_10(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI35_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI35_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -444,10 +329,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_10(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_11(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_11:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI36_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI36_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 11
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -456,10 +338,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_11(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_12(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI37_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI37_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -468,10 +347,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_12(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_13(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_13:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI38_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI38_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 13
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -480,10 +356,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_13(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_14(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI39_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI39_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -492,10 +365,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_14(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_15(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_15:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI40_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI40_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 15
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -504,11 +374,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_15(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_2(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI41_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI41_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <8 x i16> %shuffle
@@ -517,11 +383,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_2(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_4(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI42_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI42_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -530,11 +392,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_4(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_6(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI43_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI43_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -543,11 +401,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_6(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_8(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI44_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI44_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -556,11 +410,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_8(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_10(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI45_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI45_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -569,11 +419,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_10(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_12(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI46_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI46_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -582,11 +428,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_12(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_14(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI47_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI47_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -595,11 +437,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_14(<8 x i16> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsrl_v_4(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsrl_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI48_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI48_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %shuffle
@@ -608,11 +446,7 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_4(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsrl_v_8(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI49_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI49_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
ret <4 x i32> %shuffle
@@ -621,11 +455,7 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_8(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsrl_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI50_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI50_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
ret <4 x i32> %shuffle
@@ -634,11 +464,7 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI51_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI51_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.d $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
More information about the llvm-commits
mailing list