[llvm] [LoongArch] Custom legalize vector_shuffle to `xvshuf4i.d` (PR #164213)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 20 00:40:50 PDT 2025
https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/164213
None
>From 4af391cb3c12ed28f8126f09eaea455e0feac3b0 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Mon, 20 Oct 2025 15:36:05 +0800
Subject: [PATCH] [LoongArch] Custom legalize vector_shuffle to `xvshuf4i.d`
---
.../LoongArch/LoongArchISelLowering.cpp | 32 ++++++++++++++++---
.../ir-instruction/shuffle-as-xvshuf4i.ll | 9 ++----
.../LoongArch/lasx/vec-shuffle-byte-rotate.ll | 10 ++----
3 files changed, 33 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f7deeafc9ccfc..831f48de67c70 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2091,10 +2091,30 @@ static SDValue
lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
SDValue V1, SDValue V2, SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget) {
- // When the size is less than or equal to 4, lower cost instructions may be
- // used.
- if (Mask.size() <= 4)
- return SDValue();
+ // XVSHUF4I_D must be handled separately because it is different from other
+ // types of [X]VSHUF4I instructions.
+ if (Mask.size() == 4) {
+ unsigned MaskImm = 0;
+ for (int i = 1; i >= 0; --i) {
+ int MLo = Mask[i];
+ int MHi = Mask[i + 2];
+ if (!(MLo == -1 || (MLo >= 0 && MLo <= 1) || (MLo >= 4 && MLo <= 5)) ||
+ !(MHi == -1 || (MHi >= 2 && MHi <= 3) || (MHi >= 6 && MHi <= 7)))
+ return SDValue();
+ if (MHi != -1 && MLo != -1 && MHi != MLo + 2)
+ return SDValue();
+
+ MaskImm <<= 2;
+ if (MLo != -1)
+ MaskImm |= ((MLo <= 1) ? MLo : (MLo - 2)) & 0x3;
+ else if (MHi != -1)
+ MaskImm |= ((MHi <= 3) ? (MHi - 2) : (MHi - 4)) & 0x3;
+ }
+
+ return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, V2,
+ DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
+ }
+
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
}
@@ -2639,6 +2659,10 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
return Result;
+ if ((VT.SimpleTy == MVT::v4i64 || VT.SimpleTy == MVT::v4f64) &&
+ (Result =
+ lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
Zeroable)))
return Result;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index 69437a24282b2..a5f876999b1e9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -37,9 +37,7 @@ define <8 x i32> @shufflevector_xvshuf4i_v8i32(<8 x i32> %a, <8 x i32> %b) {
define <4 x i64> @shufflevector_xvshuf4i_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-LABEL: shufflevector_xvshuf4i_v4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
-; CHECK-NEXT: xvbsll.v $xr1, $xr1, 8
-; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
; CHECK-NEXT: ret
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
ret <4 x i64> %c
@@ -59,10 +57,7 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 6
; CHECK-NEXT: ret
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x double> %c
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
index 2007f851129e8..9dfa5030ecfa1 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
@@ -105,9 +105,7 @@ define <8 x i32> @byte_rotate_v8i32_3(<8 x i32> %a) nounwind {
define <4 x i64> @byte_rotate_v4i64_1(<4 x i64> %a, <4 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 8
-; CHECK-NEXT: xvbsll.v $xr1, $xr1, 8
-; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 9
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
ret <4 x i64> %shuffle
@@ -116,9 +114,7 @@ define <4 x i64> @byte_rotate_v4i64_1(<4 x i64> %a, <4 x i64> %b) nounwind {
define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvbsrl.v $xr1, $xr1, 8
-; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 3
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 5, i32 0, i32 7, i32 2>
ret <4 x i64> %shuffle
@@ -127,7 +123,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177
+; CHECK-NEXT: xvshuf4i.d $xr0, $xr0, 1
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i64> %shuffle
More information about the llvm-commits
mailing list