[llvm] [LoongArch] lower vector shuffle to shift (PR #132866)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 24 19:49:41 PDT 2025
https://github.com/tangaac created https://github.com/llvm/llvm-project/pull/132866
None
>From 2593e80e93aaf86d8e0407504426b57169a16300 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Tue, 25 Mar 2025 09:52:14 +0800
Subject: [PATCH 1/2] lower vector shuffle to shift
---
.../LoongArch/LoongArchISelLowering.cpp | 135 ++++++++-
.../Target/LoongArch/LoongArchISelLowering.h | 10 +-
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 50 ++++
.../CodeGen/LoongArch/lsx/build-vector.ll | 7 +-
.../LoongArch/lsx/vec-shuffle-bit-shift.ll | 118 ++------
.../LoongArch/lsx/vec-shuffle-byte-shift.ll | 276 ++++--------------
6 files changed, 263 insertions(+), 333 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 8d80a1ba55bcb..63a7df8565c87 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -519,6 +519,121 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
}
}
+/// Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
+/// instruction.
+// The funciton matches elements form one of the input vector shuffled to the
+// left or right with zeroable elements 'shifted in'. It handles both the
+// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
+// lane.
+// This is mainly copy from X86.
+static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
+ unsigned ScalarSizeInBits, ArrayRef<int> Mask,
+ int MaskOffset, const APInt &Zeroable) {
+ int Size = Mask.size();
+ unsigned SizeInBits = Size * ScalarSizeInBits;
+
+ auto CheckZeros = [&](int Shift, int Scale, bool Left) {
+ for (int i = 0; i < Size; i += Scale)
+ for (int j = 0; j < Shift; ++j)
+ if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
+ return false;
+
+ return true;
+ };
+
+ auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size, int Low,
+ int Step = 1) {
+ for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
+ if (!(Mask[i] == -1 || Mask[i] == Low))
+ return false;
+ return true;
+ };
+
+ auto MatchShift = [&](int Shift, int Scale, bool Left) {
+ for (int i = 0; i != Size; i += Scale) {
+ unsigned Pos = Left ? i + Shift : i;
+ unsigned Low = Left ? i : i + Shift;
+ unsigned Len = Scale - Shift;
+ if (!isSequentialOrUndefInRange(Pos, Len, Low + MaskOffset))
+ return -1;
+ }
+
+ int ShiftEltBits = ScalarSizeInBits * Scale;
+ bool ByteShift = ShiftEltBits > 64;
+ Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
+ : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
+ int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
+
+ // Normalize the scale for byte shifts to still produce an i64 element
+ // type.
+ Scale = ByteShift ? Scale / 2 : Scale;
+
+ // We need to round trip through the appropriate type for the shift.
+ MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
+ ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
+ : MVT::getVectorVT(ShiftSVT, Size / Scale);
+ return (int)ShiftAmt;
+ };
+
+ unsigned MaxWidth = 128;
+ for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
+ for (int Shift = 1; Shift != Scale; ++Shift)
+ for (bool Left : {true, false})
+ if (CheckZeros(Shift, Scale, Left)) {
+ int ShiftAmt = MatchShift(Shift, Scale, Left);
+ if (0 < ShiftAmt)
+ return ShiftAmt;
+ }
+
+ // no match
+ return -1;
+}
+
+/// Lower VECTOR_SHUFFLE as shift (if possible).
+///
+/// For example:
+/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
+/// <4 x i32> <i32 4, i32 0, i32 1, i32 2>
+/// is lowered to:
+/// (VBSLL_V $v0, $v0, 4)
+///
+/// %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
+/// <4 x i32> <i32 4, i32 0, i32 4, i32 2>
+/// is lowered to:
+/// (VSLLI_D $v0, $v0, 32)
+static SDValue lowerVECTOR_SHUFFLEAsShift(const SDLoc &DL, ArrayRef<int> Mask,
+ MVT VT, SDValue V1, SDValue V2,
+ SelectionDAG &DAG,
+ const APInt &Zeroable) {
+ int Size = Mask.size();
+ assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+ MVT ShiftVT;
+ SDValue V = V1;
+ unsigned Opcode;
+
+ // Try to match shuffle against V1 shift.
+ int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+ Mask, 0, Zeroable);
+
+ // If V1 failed, try to match shuffle against V2 shift.
+ if (ShiftAmt < 0) {
+ ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+ Mask, Size, Zeroable);
+ V = V2;
+ }
+
+ if (ShiftAmt < 0)
+ return SDValue();
+
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
+ "Illegal integer vector type");
+ V = DAG.getBitcast(ShiftVT, V);
+ V = DAG.getNode(Opcode, DL, ShiftVT, V,
+ DAG.getConstant(ShiftAmt, DL, MVT::i64));
+ return DAG.getBitcast(VT, V);
+}
+
/// Determine whether a range fits a regular pattern of values.
/// This function accounts for the possibility of jumping over the End iterator.
template <typename ValType>
@@ -587,14 +702,12 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
ArrayRef<int> Mask, MVT VT,
SDValue V1, SDValue V2,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ const APInt &Zeroable) {
int Bits = VT.getSizeInBits();
int EltBits = VT.getScalarSizeInBits();
int NumElements = VT.getVectorNumElements();
- APInt KnownUndef, KnownZero;
- computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
- APInt Zeroable = KnownUndef | KnownZero;
if (Zeroable.isAllOnes())
return DAG.getConstant(0, DL, VT);
@@ -1056,6 +1169,10 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
"Unexpected mask size for shuffle!");
assert(Mask.size() % 2 == 0 && "Expected even mask size.");
+ APInt KnownUndef, KnownZero;
+ computeZeroableShuffleElements(Mask, V1, V2, KnownUndef, KnownZero);
+ APInt Zeroable = KnownUndef | KnownZero;
+
SDValue Result;
// TODO: Add more comparison patterns.
if (V2.isUndef()) {
@@ -1083,12 +1200,14 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
return Result;
+ if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
+ Zeroable)))
+ return Result;
if ((Result =
- lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG)))
+ lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
return Result;
if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
return Result;
-
return SDValue();
}
@@ -4997,6 +5116,10 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VANY_NONZERO)
NODE_NAME_CASE(FRECIPE)
NODE_NAME_CASE(FRSQRTE)
+ NODE_NAME_CASE(VSLLI)
+ NODE_NAME_CASE(VSRLI)
+ NODE_NAME_CASE(VBSLL)
+ NODE_NAME_CASE(VBSRL)
}
#undef NODE_NAME_CASE
return nullptr;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 002fad0e20759..52d88b9b24a6b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -147,7 +147,15 @@ enum NodeType : unsigned {
// Floating point approximate reciprocal operation
FRECIPE,
- FRSQRTE
+ FRSQRTE,
+
+ // Vector logicial left / right shift by immediate
+ VSLLI,
+ VSRLI,
+
+ // Vector byte logicial left / right shift
+ VBSLL,
+ VBSRL
// Intrinsic operations end =============================================
};
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index d2063a8aaae9b..ecbcd29d88aac 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -58,6 +58,12 @@ def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplg
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
+def loongarch_vslli : SDNode<"LoongArchISD::VSLLI", SDT_LoongArchV1RUimm>;
+def loongarch_vsrli : SDNode<"LoongArchISD::VSRLI", SDT_LoongArchV1RUimm>;
+
+def loongarch_vbsll : SDNode<"LoongArchISD::VBSLL", SDT_LoongArchV1RUimm>;
+def loongarch_vbsrl : SDNode<"LoongArchISD::VBSRL", SDT_LoongArchV1RUimm>;
+
def immZExt1 : ImmLeaf<i64, [{return isUInt<1>(Imm);}]>;
def immZExt2 : ImmLeaf<i64, [{return isUInt<2>(Imm);}]>;
def immZExt3 : ImmLeaf<i64, [{return isUInt<3>(Imm);}]>;
@@ -1494,15 +1500,59 @@ def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
(VXORI_B LSX128:$vj, uimm8:$imm)>;
+// VBSLL_V
+def : Pat<(loongarch_vbsll v16i8:$vj, uimm5:$imm), (VBSLL_V v16i8:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsll v8i16:$vj, uimm5:$imm), (VBSLL_V v8i16:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsll v4i32:$vj, uimm5:$imm), (VBSLL_V v4i32:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsll v2i64:$vj, uimm5:$imm), (VBSLL_V v2i64:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsll v4f32:$vj, uimm5:$imm), (VBSLL_V v4f32:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsll v2f64:$vj, uimm5:$imm), (VBSLL_V v2f64:$vj,
+ uimm5:$imm)>;
+
+// VBSRL_V
+def : Pat<(loongarch_vbsrl v16i8:$vj, uimm5:$imm), (VBSRL_V v16i8:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsrl v8i16:$vj, uimm5:$imm), (VBSRL_V v8i16:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsrl v4i32:$vj, uimm5:$imm), (VBSRL_V v4i32:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsrl v2i64:$vj, uimm5:$imm), (VBSRL_V v2i64:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsrl v4f32:$vj, uimm5:$imm), (VBSRL_V v4f32:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vbsrl v2f64:$vj, uimm5:$imm), (VBSRL_V v2f64:$vj,
+ uimm5:$imm)>;
+
// VSLL[I]_{B/H/W/D}
defm : PatVrVr<shl, "VSLL">;
defm : PatShiftVrVr<shl, "VSLL">;
defm : PatShiftVrUimm<shl, "VSLLI">;
+def : Pat<(loongarch_vslli v16i8:$vj, uimm3:$imm), (VSLLI_B v16i8:$vj,
+ uimm3:$imm)>;
+def : Pat<(loongarch_vslli v8i16:$vj, uimm4:$imm), (VSLLI_H v8i16:$vj,
+ uimm4:$imm)>;
+def : Pat<(loongarch_vslli v4i32:$vj, uimm5:$imm), (VSLLI_W v4i32:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vslli v2i64:$vj, uimm6:$imm), (VSLLI_D v2i64:$vj,
+ uimm6:$imm)>;
// VSRL[I]_{B/H/W/D}
defm : PatVrVr<srl, "VSRL">;
defm : PatShiftVrVr<srl, "VSRL">;
defm : PatShiftVrUimm<srl, "VSRLI">;
+def : Pat<(loongarch_vsrli v16i8:$vj, uimm3:$imm), (VSRLI_B v16i8:$vj,
+ uimm3:$imm)>;
+def : Pat<(loongarch_vsrli v8i16:$vj, uimm4:$imm), (VSRLI_H v8i16:$vj,
+ uimm4:$imm)>;
+def : Pat<(loongarch_vsrli v4i32:$vj, uimm5:$imm), (VSRLI_W v4i32:$vj,
+ uimm5:$imm)>;
+def : Pat<(loongarch_vsrli v2i64:$vj, uimm6:$imm), (VSRLI_D v2i64:$vj,
+ uimm6:$imm)>;
// VSRA[I]_{B/H/W/D}
defm : PatVrVr<sra, "VSRA">;
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index 984b6f3d74866..d84e408cd28be 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -374,11 +374,8 @@ define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI24_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI24_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vst $vr1, $a1, 0
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 32
+; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%e = extractelement <4 x i32> %v, i32 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll
index b590103511847..48f18a35a38c4 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-bit-shift.ll
@@ -4,10 +4,7 @@
define <16 x i8> @shuffle_to_vslli_h_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_h_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.h $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 16, i32 2, i32 16, i32 4, i32 16, i32 6, i32 16, i32 8, i32 16, i32 10, i32 16, i32 12, i32 16, i32 14>
ret <16 x i8> %shuffle
@@ -16,10 +13,7 @@ define <16 x i8> @shuffle_to_vslli_h_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_h_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_h_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.h $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15, i32 16>
ret <16 x i8> %shuffle
@@ -28,10 +22,7 @@ define <16 x i8> @shuffle_to_vsrli_h_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_w_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_w_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.w $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 1, i32 2, i32 16, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 16, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
@@ -40,10 +31,7 @@ define <16 x i8> @shuffle_to_vslli_w_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_w_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_w_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 2, i32 3, i32 16, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 16, i32 13, i32 14, i32 15, i32 16>
ret <16 x i8> %shuffle
@@ -52,11 +40,7 @@ define <16 x i8> @shuffle_to_vsrli_w_8(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_to_vslli_w_16(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_w_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vslli.w $vr0, $vr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6>
ret <8 x i16> %shuffle
@@ -65,11 +49,7 @@ define <8 x i16> @shuffle_to_vslli_w_16(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_to_vsrli_w_16(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_w_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 8, i32 5, i32 8, i32 7, i32 8>
ret <8 x i16> %shuffle
@@ -78,10 +58,7 @@ define <8 x i16> @shuffle_to_vsrli_w_16(<8 x i16> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_w_24(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_w_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.w $vr0, $vr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 12>
ret <16 x i8> %shuffle
@@ -90,10 +67,7 @@ define <16 x i8> @shuffle_to_vslli_w_24(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_w_24(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_w_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.w $vr0, $vr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 3, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 15, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -102,10 +76,7 @@ define <16 x i8> @shuffle_to_vsrli_w_24(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_d_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
@@ -114,10 +85,7 @@ define <16 x i8> @shuffle_to_vslli_d_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_d_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
ret <16 x i8> %shuffle
@@ -126,11 +94,7 @@ define <16 x i8> @shuffle_to_vsrli_d_8(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_to_vslli_d_16(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vslli.d $vr0, $vr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 0, i32 1, i32 2, i32 8, i32 4, i32 5, i32 6>
ret <8 x i16> %shuffle
@@ -139,11 +103,7 @@ define <8 x i16> @shuffle_to_vslli_d_16(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_to_vsrli_d_16(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_16:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 16
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 8>
ret <8 x i16> %shuffle
@@ -152,10 +112,7 @@ define <8 x i16> @shuffle_to_vsrli_d_16(<8 x i16> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_d_24(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI12_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <16 x i8> %shuffle
@@ -164,10 +121,7 @@ define <16 x i8> @shuffle_to_vslli_d_24(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_d_24(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_24:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI13_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 24
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -176,11 +130,7 @@ define <16 x i8> @shuffle_to_vsrli_d_24(<16 x i8> %a) nounwind {
define <4 x i32> @shuffle_to_vslli_d_32(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_32:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI14_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vslli.d $vr0, $vr0, 32
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 4, i32 poison>
ret <4 x i32> %shuffle
@@ -189,11 +139,7 @@ define <4 x i32> @shuffle_to_vslli_d_32(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_to_vsrli_d_32(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_32:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI15_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI15_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 32
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 4, i32 3, i32 4>
ret <4 x i32> %shuffle
@@ -202,10 +148,7 @@ define <4 x i32> @shuffle_to_vsrli_d_32(<4 x i32> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_d_40(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_40:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 40
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10>
ret <16 x i8> %shuffle
@@ -214,10 +157,7 @@ define <16 x i8> @shuffle_to_vslli_d_40(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_d_40(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_40:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 40
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -226,11 +166,7 @@ define <16 x i8> @shuffle_to_vsrli_d_40(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_to_vslli_d_48(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_48:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI18_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI18_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vslli.d $vr0, $vr0, 48
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 8, i32 8, i32 8, i32 0, i32 8, i32 8, i32 8, i32 4>
ret <8 x i16> %shuffle
@@ -239,11 +175,7 @@ define <8 x i16> @shuffle_to_vslli_d_48(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_to_vsrli_d_48(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_48:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI19_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI19_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 48
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32><i32 3, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -252,10 +184,7 @@ define <8 x i16> @shuffle_to_vsrli_d_48(<8 x i16> %a) nounwind {
define <16 x i8> @shuffle_to_vslli_d_56(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vslli_d_56:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI20_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI20_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vslli.d $vr0, $vr0, 56
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8>
ret <16 x i8> %shuffle
@@ -264,10 +193,7 @@ define <16 x i8> @shuffle_to_vslli_d_56(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_to_vsrli_d_56(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_to_vsrli_d_56:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI21_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vsrli.d $vr0, $vr0, 56
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32><i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index 8156239f81963..720fe919601e6 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -4,10 +4,7 @@
define <16 x i8> @shuffle_16i8_vbsll_v_1(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
ret <16 x i8> %shuffle
@@ -16,10 +13,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_1(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_2(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
ret <16 x i8> %shuffle
@@ -28,10 +22,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_2(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_3(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <16 x i8> %shuffle
@@ -40,10 +31,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_3(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_4(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <16 x i8> %shuffle
@@ -52,10 +40,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_4(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_5(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_5:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 5
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <16 x i8> %shuffle
@@ -64,10 +49,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_5(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_6(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
ret <16 x i8> %shuffle
@@ -76,10 +58,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_6(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_7(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_7:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 7
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <16 x i8> %shuffle
@@ -88,10 +67,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_7(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <16 x i8> %shuffle
@@ -100,10 +76,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_9(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_9:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI8_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI8_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <16 x i8> %shuffle
@@ -112,10 +85,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_9(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_10(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
ret <16 x i8> %shuffle
@@ -124,10 +94,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_10(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_11(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_11:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 11
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <16 x i8> %shuffle
@@ -136,10 +103,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_11(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_12(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2, i32 3>
ret <16 x i8> %shuffle
@@ -148,10 +112,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_12(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_13(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_13:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI12_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI12_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 13
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1, i32 2>
ret <16 x i8> %shuffle
@@ -160,10 +121,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_13(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_14(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI13_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI13_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0, i32 1>
ret <16 x i8> %shuffle
@@ -172,10 +130,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_14(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsll_v_15(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsll_v_15:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI14_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI14_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 15
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 0>
ret <16 x i8> %shuffle
@@ -184,11 +139,7 @@ define <16 x i8> @shuffle_16i8_vbsll_v_15(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_2(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI15_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI15_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <8 x i16> %shuffle
@@ -197,11 +148,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_2(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_4(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI16_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI16_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
ret <8 x i16> %shuffle
@@ -210,11 +157,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_4(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_6(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI17_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI17_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle
@@ -223,11 +166,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_6(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_8(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI18_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI18_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2, i32 3>
ret <8 x i16> %shuffle
@@ -236,11 +175,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_8(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_10(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI19_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI19_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 1, i32 2>
ret <8 x i16> %shuffle
@@ -249,11 +184,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_10(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_12(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI20_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI20_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0, i32 1>
ret <8 x i16> %shuffle
@@ -262,11 +193,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_12(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsll_v_14(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsll_v_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI21_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI21_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 0>
ret <8 x i16> %shuffle
@@ -275,11 +202,7 @@ define <8 x i16> @shuffle_8i16_vbsll_v_14(<8 x i16> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsll_v_4(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsll_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI22_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI22_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
@@ -288,11 +211,7 @@ define <4 x i32> @shuffle_4i32_vbsll_v_4(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsll_v_8(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsll_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI23_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI23_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 0, i32 1>
ret <4 x i32> %shuffle
@@ -301,11 +220,7 @@ define <4 x i32> @shuffle_4i32_vbsll_v_8(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsll_v_12(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsll_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI24_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI24_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 4, i32 4, i32 0>
ret <4 x i32> %shuffle
@@ -324,10 +239,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_1(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI26_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI26_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
ret <16 x i8> %shuffle
@@ -336,10 +248,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_1(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_2(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI27_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI27_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -348,10 +257,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_2(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_3(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI28_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI28_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -360,10 +266,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_3(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_4(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI29_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI29_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -372,10 +275,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_4(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_5(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_5:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI30_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI30_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 5
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -384,10 +284,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_5(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_6(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI31_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI31_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -396,10 +293,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_6(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_7(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_7:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI32_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI32_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 7
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -408,10 +302,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_7(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_8(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI33_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI33_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -420,10 +311,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_8(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_9(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_9:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI34_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI34_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -432,10 +320,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_9(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_10(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI35_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI35_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -444,10 +329,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_10(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_11(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_11:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI36_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI36_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 11
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -456,10 +338,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_11(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_12(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI37_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI37_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -468,10 +347,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_12(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_13(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_13:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI38_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI38_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 13
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 13, i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -480,10 +356,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_13(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_14(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI39_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI39_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 14, i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -492,10 +365,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_14(<16 x i8> %a) nounwind {
define <16 x i8> @shuffle_16i8_vbsrl_v_15(<16 x i8> %a) nounwind {
; CHECK-LABEL: shuffle_16i8_vbsrl_v_15:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI40_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI40_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.b $vr0, $vr2, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 15
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 15, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
ret <16 x i8> %shuffle
@@ -504,11 +374,7 @@ define <16 x i8> @shuffle_16i8_vbsrl_v_15(<16 x i8> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_2(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI41_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI41_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
ret <8 x i16> %shuffle
@@ -517,11 +383,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_2(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_4(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI42_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI42_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -530,11 +392,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_4(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_6(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_6:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI43_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI43_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 6
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -543,11 +401,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_6(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_8(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI44_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI44_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -556,11 +410,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_8(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_10(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_10:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI45_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI45_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 10
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -569,11 +419,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_10(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_12(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI46_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI46_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -582,11 +428,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_12(<8 x i16> %a) nounwind {
define <8 x i16> @shuffle_8i16_vbsrl_v_14(<8 x i16> %a) nounwind {
; CHECK-LABEL: shuffle_8i16_vbsrl_v_14:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI47_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI47_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.h $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 14
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> <i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
ret <8 x i16> %shuffle
@@ -595,11 +437,7 @@ define <8 x i16> @shuffle_8i16_vbsrl_v_14(<8 x i16> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsrl_v_4(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsrl_v_4:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI48_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI48_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 4
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %shuffle
@@ -608,11 +446,7 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_4(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsrl_v_8(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI49_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI49_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 3, i32 4, i32 4>
ret <4 x i32> %shuffle
@@ -621,11 +455,7 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_8(<4 x i32> %a) nounwind {
define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
; CHECK-LABEL: shuffle_4i32_vbsrl_v_12:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI50_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI50_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.w $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 12
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
ret <4 x i32> %shuffle
@@ -634,11 +464,7 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI51_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI51_0)
-; CHECK-NEXT: vrepli.b $vr2, 0
-; CHECK-NEXT: vshuf.d $vr1, $vr2, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
>From 6859a0ab2069a1d82e9d12f85a5de5745fdf71ca Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Tue, 25 Mar 2025 10:41:58 +0800
Subject: [PATCH 2/2] small change
---
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 63a7df8565c87..269921d80091e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -525,7 +525,7 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
// left or right with zeroable elements 'shifted in'. It handles both the
// strictly bit-wise element shifts and the byte shfit across an entire 128-bit
// lane.
-// This is mainly copy from X86.
+// Mostly copied from X86.
static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
unsigned ScalarSizeInBits, ArrayRef<int> Mask,
int MaskOffset, const APInt &Zeroable) {
More information about the llvm-commits
mailing list