[llvm] [LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. (PR #137918)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 29 19:59:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: None (tangaac)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/137918.diff
9 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+30-16)
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+4)
- (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+6)
- (modified) llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll (+20)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll (+4-4)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll (+2-8)
- (modified) llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll (+24-4)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll (+3-9)
- (modified) llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll (+3-2)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d4e1d9c6f3ca6..4e79d1bd39387 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -994,37 +994,39 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
SelectionDAG &DAG) {
- // When the size is less than 4, lower cost instructions may be used.
- if (Mask.size() < 4)
- return SDValue();
+ unsigned SubVecSize = 4;
+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+ VT == MVT::v4i64) {
+ SubVecSize = 2;
+ }
int SubMask[4] = {-1, -1, -1, -1};
- for (unsigned i = 0; i < 4; ++i) {
- for (unsigned j = i; j < Mask.size(); j += 4) {
- int Idx = Mask[j];
+ for (unsigned i = 0; i < SubVecSize; ++i) {
+ for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
+ int M = Mask[j];
// Convert from vector index to 4-element subvector index
// If an index refers to an element outside of the subvector then give up
- if (Idx != -1) {
- Idx -= 4 * (j / 4);
- if (Idx < 0 || Idx >= 4)
+ if (M != -1) {
+ M -= 4 * (j / SubVecSize);
+ if (M < 0 || M >= 4)
return SDValue();
}
// If the mask has an undef, replace it with the current index.
// Note that it might still be undef if the current index is also undef
if (SubMask[i] == -1)
- SubMask[i] = Idx;
+ SubMask[i] = M;
// Check that non-undef values are the same as in the mask. If they
// aren't then give up
- else if (Idx != -1 && Idx != SubMask[i])
+ else if (M != -1 && M != SubMask[i])
return SDValue();
}
}
// Calculate the immediate. Replace any remaining undefs with zero
APInt Imm(64, 0);
- for (int i = 3; i >= 0; --i) {
+ for (int i = SubVecSize-1; i >= 0; --i) {
int Idx = SubMask[i];
if (Idx == -1)
@@ -1034,6 +1036,12 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
Imm |= Idx & 0x3;
}
+ // Return vshuf4i.d and xvshuf4i.d
+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+ VT == MVT::v4i64)
+ return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
+ V2, DAG.getConstant(Imm, DL, MVT::i64));
+
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
DAG.getConstant(Imm, DL, MVT::i64));
}
@@ -1343,6 +1351,11 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/* V2 = V1; */
}
+ if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
+ if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
+ return Result;
+ }
+
// It is recommended not to change the pattern comparison order for better
// performance.
if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
@@ -1413,10 +1426,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
SelectionDAG &DAG) {
- // When the size is less than or equal to 4, lower cost instructions may be
- // used.
- if (Mask.size() <= 4)
- return SDValue();
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
}
@@ -1784,6 +1793,11 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/* V2 = V1; */
}
+ if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
+ if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
+ return Result;
+ }
+
// It is recommended not to change the pattern comparison order for better
// performance.
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index e4268920e0b27..fcc2cac8d0766 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1729,6 +1729,10 @@ def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
(XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
(XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4i64:$xj, v4i64:$xk, immZExt8:$ui8),
+ (XVSHUF4I_D v4i64:$xj, v4i64:$xk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4f64:$xj, v4f64:$xk, immZExt8:$ui8),
+ (XVSHUF4I_D v4f64:$xj, v4f64:$xk, immZExt8:$ui8)>;
// XVREPL128VEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 1ffc5f8056b96..241e835721fb2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
+def SDT_LoongArchVShuf4i_D : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1,2>, SDTCisVT<3, i64>]>;
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -53,6 +54,7 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
+def loongarch_vshuf4i_d: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
@@ -1914,6 +1916,10 @@ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
(VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
(VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
+ (VSHUF4I_D v2i64:$vj, v2i64:$vk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
+ (VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;
// VREPLVEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index dc4532a7292ab..f3736f669db41 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -41,3 +41,23 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
%c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %c
}
+
+;; xvshuf4i.d
+define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+ ret <4 x i64> %c
+}
+
+;; xvshuf4i.d
+define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+ ret <4 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
index 171e68306cd11..5882d43257df8 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
@@ -35,7 +35,7 @@ define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_pack_ev_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %c
@@ -55,7 +55,7 @@ define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b)
define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflevector_pack_ev_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
ret <2 x double> %c
@@ -95,7 +95,7 @@ define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflodector_pack_od_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %c
@@ -115,7 +115,7 @@ define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b)
define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflodector_pack_od_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
index ac78a26ba4367..d1c071b45ddff 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
@@ -42,10 +42,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %c
@@ -68,10 +65,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflevector_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
index 660b9581c3d1f..cd80dcb44e433 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
-;; vilvh.b
+;; vshuf4i.b
define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v16i8:
; CHECK: # %bb.0:
@@ -11,7 +11,7 @@ define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
ret <16 x i8> %c
}
-;; vilvh.h
+;; vshuf4i.h
define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v8i4:
; CHECK: # %bb.0:
@@ -21,7 +21,7 @@ define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i16> %c
}
-;; vilvh.w
+;; vshuf4i.w
define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v4i32:
; CHECK: # %bb.0:
@@ -31,7 +31,7 @@ define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %c
}
-;; vilvh.w
+;; vshuf4i.w
define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v4f32:
; CHECK: # %bb.0:
@@ -40,3 +40,23 @@ define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b)
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %c
}
+
+;; vshuf4i.d
+define <2 x i64> @shufflevector_vshuf4i_v2d64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2d64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+ ret <2 x i64> %c
+}
+
+;; vshuf4i.d
+define <2 x double> @shufflevector_vshuf4i_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+ ret <2 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
index 7b2bb47424fee..b1e3f74cd1739 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
@@ -103,9 +103,7 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr1, $vr1, 8
-; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
@@ -114,9 +112,7 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
-; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
@@ -125,9 +121,7 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
ret <2 x i64> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index 720fe919601e6..8bf030e94d85d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -230,7 +230,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsll_v_8:
; CHECK: # %bb.0:
; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
@@ -464,7 +464,8 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
+; CHECK-NEXT: vrepli.b $vr1, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
``````````
</details>
https://github.com/llvm/llvm-project/pull/137918
More information about the llvm-commits
mailing list