[llvm] [LoongArch] lower vector shuffle as byte rotate (if possible) (PR #135157)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 16 18:43:58 PDT 2025
https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/135157
>From b051119870bdecf2f29bcdcd709f659675f291b6 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Thu, 10 Apr 2025 18:35:09 +0800
Subject: [PATCH 1/2] lower vector shuffle as byte rotate (if possible)
---
.../LoongArch/LoongArchISelLowering.cpp | 138 ++++++++++++++++++
.../LoongArch/lasx/vec-shuffle-byte-rotate.ll | 74 +++++-----
.../LoongArch/lsx/vec-shuffle-byte-rotate.ll | 74 +++++-----
3 files changed, 204 insertions(+), 82 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f72b55e1d175c..334ba193fed5e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -714,6 +714,140 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
}
}
+/// Test whether a shuffle mask is equivalent within each sub-lane.
+///
+/// The specific repeated shuffle mask is populated in \p RepeatedMask, as it is
+/// non-trivial to compute in the face of undef lanes. The representation is
+/// suitable for use with existing 128-bit shuffles as entries from the second
+/// vector have been remapped to [LaneSize, 2*LaneSize).
+static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
+ ArrayRef<int> Mask,
+ SmallVectorImpl<int> &RepeatedMask) {
+ auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
+ RepeatedMask.assign(LaneSize, -1);
+ int Size = Mask.size();
+ for (int i = 0; i < Size; ++i) {
+ assert(Mask[i] == -1 || Mask[i] >= 0);
+ if (Mask[i] < 0)
+ continue;
+ if ((Mask[i] % Size) / LaneSize != i / LaneSize)
+ // This entry crosses lanes, so there is no way to model this shuffle.
+ return false;
+
+ // Ok, handle the in-lane shuffles by detecting if and when they repeat.
+ // Adjust second vector indices to start at LaneSize instead of Size.
+ int LocalM =
+ Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
+ if (RepeatedMask[i % LaneSize] < 0)
+ // This is the first non-undef entry in this slot of a 128-bit lane.
+ RepeatedMask[i % LaneSize] = LocalM;
+ else if (RepeatedMask[i % LaneSize] != LocalM)
+ // Found a mismatch with the repeated mask.
+ return false;
+ }
+ return true;
+}
+
+/// Attempts to match vector shuffle as byte rotation.
+static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
+ ArrayRef<int> Mask) {
+
+ SDValue Lo, Hi;
+ SmallVector<int, 16> RepeatedMask;
+
+ if (!isRepeatedShuffleMask(128, VT, Mask, RepeatedMask))
+ return -1;
+
+ int NumElts = RepeatedMask.size();
+ int Rotation = 0;
+ int Scale = 16 / NumElts;
+
+ for (int i = 0; i < NumElts; ++i) {
+ int M = RepeatedMask[i];
+ assert((M == -1 || (0 <= M && M < (2 * NumElts))) &&
+ "Unexpected mask index.");
+ if (M < 0)
+ continue;
+
+ // Determine where a rotated vector would have started.
+ int StartIdx = i - (M % NumElts);
+ if (StartIdx == 0)
+ return -1;
+
+ // If we found the tail of a vector the rotation must be the missing
+ // front. If we found the head of a vector, it must be how much of the
+ // head.
+ // int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
+ int CandidateRotation = (NumElts - StartIdx) % NumElts;
+
+ if (Rotation == 0)
+ Rotation = CandidateRotation;
+ else if (Rotation != CandidateRotation)
+ return -1;
+
+ // Compute which value this mask is pointing at.
+ SDValue MaskV = M < NumElts ? V1 : V2;
+
+ // Compute which of the two target values this index should be assigned
+ // to. This reflects whether the high elements are remaining or the low
+ // elements are remaining.
+ SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
+
+ // Either set up this value if we've not encountered it before, or check
+ // that it remains consistent.
+ if (!TargetV)
+ TargetV = MaskV;
+ else if (TargetV != MaskV)
+ return -1;
+ }
+
+ // Check that we successfully analyzed the mask, and normalize the results.
+ assert(Rotation != 0 && "Failed to locate a viable rotation!");
+ assert((Lo || Hi) && "Failed to find a rotated input vector!");
+ if (!Lo)
+ Lo = Hi;
+ else if (!Hi)
+ Hi = Lo;
+
+ V1 = Lo;
+ V2 = Hi;
+
+ return Rotation * Scale;
+}
+
+/// Lower VECTOR_SHUFFLE as byte rotate (if possible).
+///
+/// For example:
+/// %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b,
+/// <2 x i32> <i32 3, i32 0>
+/// is lowered to:
+/// (VBSRL_V $v1, $v1, 8)
+/// (VBSLL_V $v0, $v0, 8)
+/// (VOR_V $v0, $V0, $v1)
+static SDValue lowerVECTOR_SHUFFLEAsByteRotate(const SDLoc &DL,
+ ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2,
+ SelectionDAG &DAG) {
+
+ SDValue Lo = V1, Hi = V2;
+ int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask);
+ if (ByteRotation <= 0)
+ return SDValue();
+
+ MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
+ Lo = DAG.getBitcast(ByteVT, Lo);
+ Hi = DAG.getBitcast(ByteVT, Hi);
+
+ int LoByteShift = 16 - ByteRotation;
+ int HiByteShift = ByteRotation;
+
+ SDValue LoShift = DAG.getNode(LoongArchISD::VBSLL, DL, ByteVT, Lo,
+ DAG.getConstant(LoByteShift, DL, MVT::i64));
+ SDValue HiShift = DAG.getNode(LoongArchISD::VBSRL, DL, ByteVT, Hi,
+ DAG.getConstant(HiByteShift, DL, MVT::i64));
+ return DAG.getBitcast(VT, DAG.getNode(ISD::OR, DL, ByteVT, LoShift, HiShift));
+}
+
/// Lower VECTOR_SHUFFLE as ZERO_EXTEND Or ANY_EXTEND (if possible).
///
/// For example:
@@ -1230,6 +1364,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result =
lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Zeroable)))
return Result;
+ if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, Mask, VT, V1, V2, DAG)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
return Result;
return SDValue();
@@ -1666,6 +1802,8 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
if ((Result =
lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
return Result;
+ if ((Result = lowerVECTOR_SHUFFLEAsByteRotate(DL, NewMask, VT, V1, V2, DAG)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
return Result;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
index 88d9037e3a9e9..b697a2fd07435 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
@@ -6,9 +6,9 @@
define <32 x i8> @byte_rotate_v32_i8_1(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: byte_rotate_v32_i8_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1
+; CHECK-NEXT: xvbsll.v $xr1, $xr1, 15
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
ret <32 x i8> %shuffle
@@ -17,9 +17,9 @@ define <32 x i8> @byte_rotate_v32_i8_1(<32 x i8> %a, <32 x i8> %b) {
define <32 x i8> @byte_rotate_v32_i8_2(<32 x i8> %a, <32 x i8> %b) {
; CHECK-LABEL: byte_rotate_v32_i8_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 13
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 3
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 45, i32 46, i32 47, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 61, i32 62, i32 63, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
ret <32 x i8> %shuffle
@@ -28,9 +28,9 @@ define <32 x i8> @byte_rotate_v32_i8_2(<32 x i8> %a, <32 x i8> %b) {
define <32 x i8> @byte_rotate_v32_i8_3(<32 x i8> %a) {
; CHECK-LABEL: byte_rotate_v32_i8_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 1
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 15
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <32 x i8> %a, <32 x i8> poison, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16>
ret <32 x i8> %shuffle
@@ -40,10 +40,9 @@ define <32 x i8> @byte_rotate_v32_i8_3(<32 x i8> %a) {
define <16 x i16> @byte_rotate_v16i16_1(<16 x i16> %a, <16 x i16> %b) nounwind {
; CHECK-LABEL: byte_rotate_v16i16_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 6
+; CHECK-NEXT: xvbsll.v $xr1, $xr1, 10
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 14, i32 15, i32 24, i32 25, i32 26>
ret <16 x i16> %shuffle
@@ -52,10 +51,9 @@ define <16 x i16> @byte_rotate_v16i16_1(<16 x i16> %a, <16 x i16> %b) nounwind {
define <16 x i16> @byte_rotate_v16i16_2(<16 x i16> %a, <16 x i16> %b) nounwind {
; CHECK-LABEL: byte_rotate_v16i16_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: xvshuf.h $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 10
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 6
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 21, i32 22, i32 23, i32 0, i32 1, i32 2,i32 3, i32 4, i32 29, i32 30, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12>
ret <16 x i16> %shuffle
@@ -64,10 +62,9 @@ define <16 x i16> @byte_rotate_v16i16_2(<16 x i16> %a, <16 x i16> %b) nounwind {
define <16 x i16> @byte_rotate_v16i16_3(<16 x i16> %a) nounwind {
; CHECK-LABEL: byte_rotate_v16i16_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 6
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 10
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i16> %a, <16 x i16> poison, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10>
ret <16 x i16> %shuffle
@@ -76,10 +73,9 @@ define <16 x i16> @byte_rotate_v16i16_3(<16 x i16> %a) nounwind {
define <8 x i32> @byte_rotate_v8i32_1(<8 x i32> %a, <8 x i32> %b) nounwind {
; CHECK-LABEL: byte_rotate_v8i32_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 4
+; CHECK-NEXT: xvbsll.v $xr1, $xr1, 12
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
ret <8 x i32> %shuffle
@@ -88,10 +84,9 @@ define <8 x i32> @byte_rotate_v8i32_1(<8 x i32> %a, <8 x i32> %b) nounwind {
define <8 x i32> @byte_rotate_v8i32_2(<8 x i32> %a, <8 x i32> %b) nounwind {
; CHECK-LABEL: byte_rotate_v8i32_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 12
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 4
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
ret <8 x i32> %shuffle
@@ -109,10 +104,9 @@ define <8 x i32> @byte_rotate_v8i32_3(<8 x i32> %a) nounwind {
define <4 x i64> @byte_rotate_v4i64_1(<4 x i64> %a, <4 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
+; CHECK-NEXT: xvbsll.v $xr1, $xr1, 8
+; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
ret <4 x i64> %shuffle
@@ -121,10 +115,9 @@ define <4 x i64> @byte_rotate_v4i64_1(<4 x i64> %a, <4 x i64> %b) nounwind {
define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 5, i32 0, i32 7, i32 2>
ret <4 x i64> %shuffle
@@ -133,10 +126,9 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8
+; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
+; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i64> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
index eb92d236f43a0..7b2bb47424fee 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
@@ -6,9 +6,9 @@
define <16 x i8> @byte_rotate_v16i8_1(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECK-LABEL: byte_rotate_v16i8_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 5
+; CHECK-NEXT: vbsll.v $vr1, $vr1, 11
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
ret <16 x i8> %shuffle
@@ -17,9 +17,9 @@ define <16 x i8> @byte_rotate_v16i8_1(<16 x i8> %a, <16 x i8> %b) nounwind {
define <16 x i8> @byte_rotate_v16i8_2(<16 x i8> %a, <16 x i8> %b) nounwind {
; CHECK-LABEL: byte_rotate_v16i8_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 11
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 5
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <16 x i8> %shuffle
@@ -28,9 +28,9 @@ define <16 x i8> @byte_rotate_v16i8_2(<16 x i8> %a, <16 x i8> %b) nounwind {
define <16 x i8> @byte_rotate_v16i8_3(<16 x i8> %a) nounwind {
; CHECK-LABEL: byte_rotate_v16i8_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 11
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 5
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <16 x i8> %a, <16 x i8> poison, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <16 x i8> %shuffle
@@ -39,10 +39,9 @@ define <16 x i8> @byte_rotate_v16i8_3(<16 x i8> %a) nounwind {
define <8 x i16> @byte_rotate_v8i16_1(<8 x i16> %a, <8 x i16> %b) nounwind {
; CHECK-LABEL: byte_rotate_v8i16_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr1, 10
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 6
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle
@@ -51,10 +50,9 @@ define <8 x i16> @byte_rotate_v8i16_1(<8 x i16> %a, <8 x i16> %b) nounwind {
define <8 x i16> @byte_rotate_v8i16_2(<8 x i16> %a, <8 x i16> %b) nounwind {
; CHECK-LABEL: byte_rotate_v8i16_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI4_0)
-; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 6
+; CHECK-NEXT: vbsll.v $vr1, $vr1, 10
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x i16> %shuffle
@@ -63,10 +61,9 @@ define <8 x i16> @byte_rotate_v8i16_2(<8 x i16> %a, <8 x i16> %b) nounwind {
define <8 x i16> @byte_rotate_v8i16_3(<8 x i16> %a) nounwind {
; CHECK-LABEL: byte_rotate_v8i16_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 10
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 6
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i16> %shuffle
@@ -75,10 +72,9 @@ define <8 x i16> @byte_rotate_v8i16_3(<8 x i16> %a) nounwind {
define <4 x i32> @byte_rotate_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i32_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr1, 12
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 4
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
ret <4 x i32> %shuffle
@@ -87,10 +83,9 @@ define <4 x i32> @byte_rotate_v4i32_1(<4 x i32> %a, <4 x i32> %b) nounwind {
define <4 x i32> @byte_rotate_v4i32_2(<4 x i32> %a, <4 x i32> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i32_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 4
+; CHECK-NEXT: vbsll.v $vr1, $vr1, 12
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %shuffle
@@ -108,10 +103,9 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI9_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI9_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr1, 8
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
@@ -120,10 +114,9 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
+; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
+; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
@@ -132,10 +125,9 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: vshuf.d $vr1, $vr0, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr1, 0
+; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
+; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
+; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
ret <2 x i64> %shuffle
>From 2b6423cfbe8fbcee139f9f3c052ff8f9fc8ed5c0 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Thu, 17 Apr 2025 09:32:49 +0800
Subject: [PATCH 2/2] small change
---
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 334ba193fed5e..7e37bbd652114 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -777,8 +777,7 @@ static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
// If we found the tail of a vector the rotation must be the missing
// front. If we found the head of a vector, it must be how much of the
// head.
- // int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
- int CandidateRotation = (NumElts - StartIdx) % NumElts;
+ int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
if (Rotation == 0)
Rotation = CandidateRotation;
More information about the llvm-commits
mailing list