[llvm] ebf9883 - [LoongArch] Lower vector shuffle as byte rotate (if possible) (#135157)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 20:08:21 PDT 2025
Author: tangaac
Date: 2025-04-17T11:08:18+08:00
New Revision: ebf9883b82aed98aacdfc77824655fec8dc46988
URL: https://github.com/llvm/llvm-project/commit/ebf9883b82aed98aacdfc77824655fec8dc46988
DIFF: https://github.com/llvm/llvm-project/commit/ebf9883b82aed98aacdfc77824655fec8dc46988.diff
LOG: [LoongArch] Lower vector shuffle as byte rotate (if possible) (#135157)
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f72b55e1d175c..7e37bbd652114 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -714,6 +714,139 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
}
}
+/// Test whether a shuffle mask is equivalent within each sub-lane.
+///
+/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
+/// non-trivial to compute in the face of undef lanes. The representation is
+/// suitable for use with existing 128-bit shuffles as entries from the second
+/// vector have been remapped to [LaneSize, 2*LaneSize).
+static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
+ ArrayRef<int> Mask,
+ SmallVectorImpl<int> &RepeatedMask) {
+ auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
+ RepeatedMask.assign(LaneSize, -1);
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i) {
+ assert(Mask[i] == -1 || Mask[i] >= 0);
+ if (Mask[i] < 0)
+ continue;
+ if ((Mask[i] % Size) / LaneSize != i / LaneSize)
+ // This entry crosses lanes, so there is no way to model this shuffle.
+ return false;
+
+ // Ok, handle the in-lane shuffles by detecting if and when they repeat.
+ // Adjust second vector indices to start at LaneSize instead of Size.
+ int LocalM =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
+ if (RepeatedMask[i % LaneSize] < 0)
+ // This is the first non-undef entry in this slot of a 128-bit lane.
+ RepeatedMask[i % LaneSize] = LocalM;
+ else if (RepeatedMask[i % LaneSize] != LocalM)
+ // Found a mismatch with the repeated mask.
+ return false;
+ }
+ return true;
+}
+
+/// Attempts to match vector shuffle as byte rotation.
+static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask) {
+
+ SDValue Lo, Hi;
+ SmallVector<int, 16> RepeatedMask;
+
+ if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
+ return -1;
+
+ int NumElts = RepeatedMask.size();
+ int Rotation = 0;
+ int Scale = 16 / NumElts;
+
+ for (int i = 0; i < NumElts; ++i) {
+ int M = RepeatedMask[i];
+ assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
+ "Unexpected mask index.");
+ if (M < 0)
+ continue;
+
+ // Determine where a rotated vector would have started.
+ int StartIdx = i - (M % NumElts);
+ if (StartIdx == 0)
+ return -1;
+
+ // If we found the tail of a vector the rotation must be the missing
+ // front. If we found the head of a vector, it must be how much of the
+ // head.
+ int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
+
+ if (Rotation == 0)
+ Rotation = CandidateRotation;
+ else if (Rotation != CandidateRotation)
+ return -1;
+
+ // Compute which value this mask is pointing at.
+ SDValue MaskV = M < NumElts ? V1 : V2;
+
+ // Compute which of the two target values this index should be assigned
+ // to. This reflects whether the high elements are remaining or the low
+ // elements are remaining.
+ SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
+
+ // Either set up this value if we've not encountered it before, or check
+ // that it remains consistent.
+ if (!TargetV)
+ TargetV = MaskV;
+ else if (TargetV != MaskV)
+ return -1;
+ }
+
+ // Check that we successfully analyzed the mask, and normalize the results.
+ assert(Rotation != 0 && "Failed to locate a viable rotation!");
+ assert((Lo || Hi) && "Failed to find a rotated input vector!");
+ if (!Lo)
+ Lo = Hi;
+ else if (!Hi)
+ Hi = Lo;
+
+ V1 = Lo;
+ V2 = Hi;
+
+ return Rotation * Scale;
+}
+
+/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
+///
+/// For example:
+/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
+/// <2 x i32> <i32 3, i32 0>
+/// is lowered to:
+/// (VBSRL_V $v1, $v1, 8)
+/// (VBSLL_V $v0, $v0, 8)
+/// (VOR_V $v0, $V0, $v1)
+static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL,
+ ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
+
+ SDValue Lo = V1, Hi = V2;
+ int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
+ if (ByteRotation <= 0)
+ return SDValue();
+
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ Lo = DAG.getBitcast(ByteVT, Lo);
+ Hi = DAG.getBitcast(ByteVT, Hi);
+
+ int LoByteShift = 16 - ByteRotation;
+ int HiByteShift = ByteRotation;
+
+ SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
+ DAG.getConstant(LoByteShift, DL, MVT::i64));
+ SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
+ DAG.getConstant(HiByteShift, DL, MVT::i64));
+ return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
+}
+
/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
///
/// For example:
@@ -1230,6 +1363,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result =
lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
return Result;
+ if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
return Result;
return SDValue();
@@ -1666,6 +1801,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result =
lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
return Result;
+ if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
return Result;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
index 88d9037e3a9e9..b697a2fd07435 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
@@ -6,9 +6,9 @@
define <32 x i8> @byte_rotate_v32_i8_1(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: byte_rotate_v32_i8_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1
+; CHECK-NEXT: xvbsll.v $xr1, $xr1, 15
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
ret <32 x i8> %shuffle
@@ -17,9 +17,9 @@ define <32 x i8> @byte_rotate_v32_i8_1(<32 x i8> %a, <32 x i8> %b) {
define <32 x i8> @byte_rotate_v32_i8_2(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: byte_rotate_v32_i8_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 13
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 3
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 45, i32 46, i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 61, i32 62, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
ret <32 x i8> %shuffle
@@ -28,9 +28,9 @@ define <32 x i8> @byte_rotate_v32_i8_2(<32 x i8> %a, <32 x i8> %b) {
define <32 x i8> @byte_rotate_v32_i8_3(<32 x i8> %a) {
; CHECK-LABEL: byte_rotate_v32_i8_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 1
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 15
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16>
ret <32 x i8> %shuffle
@@ -40,10 +40,9 @@ define <32 x i8> @byte_rotate_v32_i8_3(<32 x i8> %a) {
define <16 x i16> @byte_rotate_v16i16_1(<16 x i16> %a, <16 x i16> %b) nounwind {
; CHECK-LABEL: byte_rotate_v16i16_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 6
+; CHECK-NEXT: xvbsll.v $xr1, $xr1, 10
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
ret <16 x i16> %shuffle
@@ -52,10 +51,9 @@ define <16 x i16> @byte_rotate_v16i16_1(<16 x i16> %a, <16 x i16> %b) nounwind {
define <16 x i16> @byte_rotate_v16i16_2(<16 x i16> %a, <16 x i16> %b) nounwind {
; CHECK-LABEL: byte_rotate_v16i16_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 10
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 6
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2,i32 3, i32 4, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <16 x i16> %shuffle
@@ -64,10 +62,9 @@ define <16 x i16> @byte_rotate_v16i16_2(<16 x i16> %a, <16 x i16> %b) nounwind {
define <16 x i16> @byte_rotate_v16i16_3(<16 x i16> %a) nounwind {
; CHECK-LABEL: byte_rotate_v16i16_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 6
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 10
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
ret <16 x i16> %shuffle
@@ -76,10 +73,9 @@ define <16 x i16> @byte_rotate_v16i16_3(<16 x i16> %a) nounwind {
define <8 x i32> @byte_rotate_v8i32_1(<8 x i32> %a, <8 x i32> %b) nounwind {
; CHECK-LABEL: byte_rotate_v8i32_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 4
+; CHECK-NEXT: xvbsll.v $xr1, $xr1, 12
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
ret <8 x i32> %shuffle
@@ -88,10 +84,9 @@ define <8 x i32> @byte_rotate_v8i32_1(<8 x i32> %a, <8 x i32> %b) nounwind {
define <8 x i32> @byte_rotate_v8i32_2(<8 x i32> %a, <8 x i32> %b) nounwind {
; CHECK-LABEL: byte_rotate_v8i32_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 12
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 4
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
ret <8 x i32> %shuffle
@@ -109,10 +104,9 @@ define <8 x i32> @byte_rotate_v8i32_3(<8 x i32> %a) nounwind {
define <4 x i64> @byte_rotate_v4i64_1(<4 x i64> %a, <4 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
+; CHECK-NEXT: xvbsll.v $xr1, $xr1, 8
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
ret <4 x i64> %shuffle
@@ -121,10 +115,9 @@ define <4 x i64> @byte_rotate_v4i64_1(<4 x i64> %a, <4 x i64> %b) nounwind {
define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 5, i32 0, i32 7, i32 2>
ret <4 x i64> %shuffle
@@ -133,10 +126,9 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i64> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
index eb92d236f43a0..7b2bb47424fee 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
@@ -6,9 +6,9 @@
define <16 x i8> @byte_rotate_v16i8_1(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECK-LABEL: byte_rotate_v16i8_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 5
+; CHECK-NEXT: vbsll.v $vr1, $vr1, 11
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
ret <16 x i8> %shuffle
@@ -17,9 +17,9 @@ define <16 x i8> @byte_rotate_v16i8_1(<16 x i8> %a, <16 x i8> %b) nounwind {
define <16 x i8> @byte_rotate_v16i8_2(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECK-LABEL: byte_rotate_v16i8_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 11
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 5
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <16 x i8> %shuffle
@@ -28,9 +28,9 @@ define <16 x i8> @byte_rotate_v16i8_2(<16 x i8> %a, <16 x i8> %b) nounwind {
define <16 x i8> @byte_rotate_v16i8_3(<16 x i8> %a) nounwind {
; CHECK-LABEL: byte_rotate_v16i8_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 11
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 5
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <16 x i8> %shuffle
@@ -39,10 +39,9 @@ define <16 x i8> @byte_rotate_v16i8_3(<16 x i8> %a) nounwind {
define <8 x i16> @byte_rotate_v8i16_1(<8 x i16> %a, <8 x i16> %b) nounwind {
; CHECK-LABEL: byte_rotate_v8i16_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr1, 10
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 6
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle
@@ -51,10 +50,9 @@ define <8 x i16> @byte_rotate_v8i16_1(<8 x i16> %a, <8 x i16> %b) nounwind {
define <8 x i16> @byte_rotate_v8i16_2(<8 x i16> %a, <8 x i16> %b) nounwind {
; CHECK-LABEL: byte_rotate_v8i16_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 6
+; CHECK-NEXT: vbsll.v $vr1, $vr1, 10
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x i16> %shuffle
@@ -63,10 +61,9 @@ define <8 x i16> @byte_rotate_v8i16_2(<8 x i16> %a, <8 x i16> %b) nounwind {
define <8 x i16> @byte_rotate_v8i16_3(<8 x i16> %a) nounwind {
; CHECK-LABEL: byte_rotate_v8i16_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 10
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 6
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle
@@ -75,10 +72,9 @@ define <8 x i16> @byte_rotate_v8i16_3(<8 x i16> %a) nounwind {
define <4 x i32> @byte_rotate_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i32_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr1, 12
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 4
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
@@ -87,10 +83,9 @@ define <4 x i32> @byte_rotate_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
define <4 x i32> @byte_rotate_v4i32_2(<4 x i32> %a, <4 x i32> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i32_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 4
+; CHECK-NEXT: vbsll.v $vr1, $vr1, 12
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %shuffle
@@ -108,10 +103,9 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr1, 8
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
@@ -120,10 +114,9 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
+; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
@@ -132,10 +125,9 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: vshuf.d $vr1, $vr0, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
ret <2 x i64> %shuffle
More information about the llvm-commits
mailing list