[llvm] [LoongArch] Lower [x]vshuf.d to [x]vshuf4i.d if possible. (PR #137918)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 29 21:51:34 PDT 2025
https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/137918
>From 95b21e14a3c63882f19261da1edca127976c3410 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Wed, 30 Apr 2025 11:06:48 +0800
Subject: [PATCH 1/2] Lower [x]vshuf.d to [x]vshuf4i.d if possible
---
.../LoongArch/LoongArchISelLowering.cpp | 46 ++++++++++++-------
.../LoongArch/LoongArchLASXInstrInfo.td | 4 ++
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 10 ++++
.../ir-instruction/shuffle-as-xvshuf4i.ll | 20 ++++++++
.../lsx/ir-instruction/shuffle-as-vpack.ll | 8 ++--
.../lsx/ir-instruction/shuffle-as-vshuf.ll | 10 +---
.../lsx/ir-instruction/shuffle-as-vshuf4i.ll | 28 +++++++++--
.../LoongArch/lsx/vec-shuffle-byte-rotate.ll | 12 ++---
.../LoongArch/lsx/vec-shuffle-byte-shift.ll | 5 +-
9 files changed, 100 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d4e1d9c6f3ca6..3249e5af77e20 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -994,37 +994,39 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
SelectionDAG &DAG) {
- // When the size is less than 4, lower cost instructions may be used.
- if (Mask.size() < 4)
- return SDValue();
+ unsigned SubVecSize = 4;
+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+ VT == MVT::v4i64) {
+ SubVecSize = 2;
+ }
int SubMask[4] = {-1, -1, -1, -1};
- for (unsigned i = 0; i < 4; ++i) {
- for (unsigned j = i; j < Mask.size(); j += 4) {
- int Idx = Mask[j];
+ for (unsigned i = 0; i < SubVecSize; ++i) {
+ for (unsigned j = i; j < Mask.size(); j += SubVecSize) {
+ int M = Mask[j];
// Convert from vector index to 4-element subvector index
// If an index refers to an element outside of the subvector then give up
- if (Idx != -1) {
- Idx -= 4 * (j / 4);
- if (Idx < 0 || Idx >= 4)
+ if (M != -1) {
+ M -= 4 * (j / SubVecSize);
+ if (M < 0 || M >= 4)
return SDValue();
}
// If the mask has an undef, replace it with the current index.
// Note that it might still be undef if the current index is also undef
if (SubMask[i] == -1)
- SubMask[i] = Idx;
+ SubMask[i] = M;
// Check that non-undef values are the same as in the mask. If they
// aren't then give up
- else if (Idx != -1 && Idx != SubMask[i])
+ else if (M != -1 && M != SubMask[i])
return SDValue();
}
}
// Calculate the immediate. Replace any remaining undefs with zero
APInt Imm(64, 0);
- for (int i = 3; i >= 0; --i) {
+ for (int i = SubVecSize - 1; i >= 0; --i) {
int Idx = SubMask[i];
if (Idx == -1)
@@ -1034,6 +1036,12 @@ static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
Imm |= Idx & 0x3;
}
+ // Return vshuf4i.d and xvshuf4i.d
+ if (VT == MVT::v2f64 || VT == MVT::v2i64 || VT == MVT::v4f64 ||
+ VT == MVT::v4i64)
+ return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
+ DAG.getConstant(Imm, DL, MVT::i64));
+
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
DAG.getConstant(Imm, DL, MVT::i64));
}
@@ -1343,6 +1351,11 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/* V2 = V1; */
}
+ if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
+ if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
+ return Result;
+ }
+
// It is recommended not to change the pattern comparison order for better
// performance.
if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
@@ -1413,10 +1426,6 @@ static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
MVT VT, SDValue V1, SDValue V2,
SelectionDAG &DAG) {
- // When the size is less than or equal to 4, lower cost instructions may be
- // used.
- if (Mask.size() <= 4)
- return SDValue();
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
}
@@ -1784,6 +1793,11 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/* V2 = V1; */
}
+ if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
+ if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
+ return Result;
+ }
+
// It is recommended not to change the pattern comparison order for better
// performance.
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index e4268920e0b27..f943c2e4766e8 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1729,6 +1729,10 @@ def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8),
(XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8),
(XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4i64:$xj, v4i64:$xk, immZExt8:$ui8),
+ (XVSHUF4I_D v4i64:$xj, v4i64:$xk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v4f64:$xj, v4f64:$xk, immZExt8:$ui8),
+ (XVSHUF4I_D v4f64:$xj, v4f64:$xk, immZExt8:$ui8)>;
// XVREPL128VEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4),
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 1ffc5f8056b96..ad5b49564f9cd 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,6 +23,10 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
+def SDT_LoongArchVShuf4i_D
+ : SDTypeProfile<1, 3,
+ [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+ SDTCisVT<3, i64>]>;
def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -53,6 +57,8 @@ def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>;
def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
+def loongarch_vshuf4i_d
+ : SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchVShuf4i_D>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
@@ -1914,6 +1920,10 @@ def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8),
(VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>;
def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8),
(VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2i64:$vj, v2i64:$vk, immZExt8:$ui8),
+ (VSHUF4I_D v2i64:$vj, v2i64:$vk, immZExt8:$ui8)>;
+def : Pat<(loongarch_vshuf4i_d v2f64:$vj, v2f64:$vk, immZExt8:$ui8),
+ (VSHUF4I_D v2f64:$vj, v2f64:$vk, immZExt8:$ui8)>;
// VREPLVEI_{B/H/W/D}
def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index dc4532a7292ab..f3736f669db41 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -41,3 +41,23 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
%c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
ret <8 x float> %c
}
+
+;; xvshuf4i.d
+define <4 x i64> @shufflevector_xvshuf4i_v4d64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4d64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+ ret <4 x i64> %c
+}
+
+;; xvshuf4i.d
+define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+ ret <4 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
index 171e68306cd11..5882d43257df8 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
@@ -35,7 +35,7 @@ define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_pack_ev_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %c
@@ -55,7 +55,7 @@ define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b)
define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflevector_pack_ev_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
ret <2 x double> %c
@@ -95,7 +95,7 @@ define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflodector_pack_od_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %c
@@ -115,7 +115,7 @@ define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b)
define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflodector_pack_od_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
index ac78a26ba4367..d1c071b45ddff 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll
@@ -42,10 +42,7 @@ define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
ret <2 x i64> %c
@@ -68,10 +65,7 @@ define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) {
define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflevector_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: vld $vr2, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0
-; CHECK-NEXT: vori.b $vr0, $vr2, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 12
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 3>
ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
index 660b9581c3d1f..cd80dcb44e433 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s
-;; vilvh.b
+;; vshuf4i.b
define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v16i8:
; CHECK: # %bb.0:
@@ -11,7 +11,7 @@ define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) {
ret <16 x i8> %c
}
-;; vilvh.h
+;; vshuf4i.h
define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v8i4:
; CHECK: # %bb.0:
@@ -21,7 +21,7 @@ define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) {
ret <8 x i16> %c
}
-;; vilvh.w
+;; vshuf4i.w
define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v4i32:
; CHECK: # %bb.0:
@@ -31,7 +31,7 @@ define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) {
ret <4 x i32> %c
}
-;; vilvh.w
+;; vshuf4i.w
define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: shufflevector_vshuf4i_v4f32:
; CHECK: # %bb.0:
@@ -40,3 +40,23 @@ define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b)
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %c
}
+
+;; vshuf4i.d
+define <2 x i64> @shufflevector_vshuf4i_v2d64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2d64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+ ret <2 x i64> %c
+}
+
+;; vshuf4i.d
+define <2 x double> @shufflevector_vshuf4i_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: shufflevector_vshuf4i_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
+; CHECK-NEXT: ret
+ %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+ ret <2 x double> %c
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
index 7b2bb47424fee..b1e3f74cd1739 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-rotate.ll
@@ -103,9 +103,7 @@ define <4 x i32> @byte_rotate_v4i32_3(<4 x i32> %a) nounwind {
define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr1, $vr1, 8
-; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 0>
ret <2 x i64> %shuffle
@@ -114,9 +112,7 @@ define <2 x i64> @byte_rotate_v2i64_1(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
-; CHECK-NEXT: vbsll.v $vr1, $vr1, 8
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
@@ -125,9 +121,7 @@ define <2 x i64> @byte_rotate_v2i64_2(<2 x i64> %a, <2 x i64> %b) nounwind {
define <2 x i64> @byte_rotate_v2i64_3(<2 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v2i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr1, $vr0, 8
-; CHECK-NEXT: vbsll.v $vr0, $vr0, 8
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
ret <2 x i64> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index 720fe919601e6..8bf030e94d85d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -230,7 +230,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsll_v_8:
; CHECK: # %bb.0:
; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 2
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
@@ -464,7 +464,8 @@ define <4 x i32> @shuffle_4i32_vbsrl_v_12(<4 x i32> %a) nounwind {
define <2 x i64> @shuffle_2i64_vbsrl_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsrl_v_8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vbsrl.v $vr0, $vr0, 8
+; CHECK-NEXT: vrepli.b $vr1, 0
+; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2>
ret <2 x i64> %shuffle
>From ee3de56caf9631493993f04272886505b082fd65 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Wed, 30 Apr 2025 12:43:42 +0800
Subject: [PATCH 2/2] reorder the lower shuffle opertation
---
.../Target/LoongArch/LoongArchISelLowering.cpp | 16 ++++++----------
.../lsx/ir-instruction/shuffle-as-vpack.ll | 8 ++++----
.../LoongArch/lsx/vec-shuffle-byte-shift.ll | 2 +-
3 files changed, 11 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 3249e5af77e20..edabd8e574607 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1351,11 +1351,6 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/* V2 = V1; */
}
- if (VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) {
- if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
- return Result;
- }
-
// It is recommended not to change the pattern comparison order for better
// performance.
if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
@@ -1370,6 +1365,9 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
return Result;
+ if ((VT.SimpleTy == MVT::v2i64 || VT.SimpleTy == MVT::v2f64) &&
+ (Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(DL, Mask, VT, V1, V2, DAG,
Zeroable)))
return Result;
@@ -1793,11 +1791,6 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/* V2 = V1; */
}
- if (VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) {
- if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
- return Result;
- }
-
// It is recommended not to change the pattern comparison order for better
// performance.
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
@@ -1812,6 +1805,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
return Result;
+ if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
+ (Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
+ return Result;
if ((Result =
lowerVECTOR_SHUFFLEAsShift(DL, NewMask, VT, V1, V2, DAG, Zeroable)))
return Result;
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
index 5882d43257df8..171e68306cd11 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll
@@ -35,7 +35,7 @@ define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflevector_pack_ev_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
+; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %c
@@ -55,7 +55,7 @@ define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b)
define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflevector_pack_ev_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 8
+; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
ret <2 x double> %c
@@ -95,7 +95,7 @@ define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) {
define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-LABEL: shufflodector_pack_od_v2i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
+; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
ret <2 x i64> %c
@@ -115,7 +115,7 @@ define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b)
define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) {
; CHECK-LABEL: shufflodector_pack_od_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 13
+; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0
; CHECK-NEXT: ret
%c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
ret <2 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
index 8bf030e94d85d..ff0f252ba2bdf 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-shuffle-byte-shift.ll
@@ -230,7 +230,7 @@ define <2 x i64> @shuffle_2i64_vbsll_v_8(<2 x i64> %a) nounwind {
; CHECK-LABEL: shuffle_2i64_vbsll_v_8:
; CHECK: # %bb.0:
; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 2
+; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1
; CHECK-NEXT: ret
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
ret <2 x i64> %shuffle
More information about the llvm-commits
mailing list