[llvm] beed796 - [LoongArch] Custom legalize vector_shuffle to xvpermi.d when possible (#160429)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 00:34:14 PDT 2025
Author: ZhaoQi
Date: 2025-09-26T15:34:10+08:00
New Revision: beed796ea02a54ec44a30db092e3f5d045390434
URL: https://github.com/llvm/llvm-project/commit/beed796ea02a54ec44a30db092e3f5d045390434
DIFF: https://github.com/llvm/llvm-project/commit/beed796ea02a54ec44a30db092e3f5d045390434.diff
LOG: [LoongArch] Custom legalize vector_shuffle to xvpermi.d when possible (#160429)
Added:
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 7bf6493046882..5d4a8fd080202 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -1603,7 +1603,7 @@ static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend(const SDLoc &DL,
/// value is necessary in order to fit the above form.
static SDValue
lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
- SDValue V1, SDValue V2, SelectionDAG &DAG,
+ SDValue V1, SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget) {
int SplatIndex = -1;
for (const auto &M : Mask) {
@@ -1996,8 +1996,8 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
SDValue Result;
// TODO: Add more comparison patterns.
if (V2.isUndef()) {
- if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG,
- Subtarget)))
+ if ((Result =
+ lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
return Result;
if ((Result =
lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget)))
@@ -2053,7 +2053,7 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
/// value is necessary in order to fit the above form.
static SDValue
lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
- SDValue V1, SDValue V2, SelectionDAG &DAG,
+ SDValue V1, SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget) {
int SplatIndex = -1;
for (const auto &M : Mask) {
@@ -2096,10 +2096,29 @@ lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG, Subtarget);
}
+/// Lower VECTOR_SHUFFLE into XVPERMI (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVPERMI(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ // Only consider XVPERMI_D.
+ if (Mask.size() != 4 || (VT != MVT::v4i64 && VT != MVT::v4f64))
+ return SDValue();
+
+ unsigned MaskImm = 0;
+ for (unsigned i = 0; i < Mask.size(); ++i) {
+ if (Mask[i] == -1)
+ continue;
+ MaskImm |= Mask[i] << (i * 2);
+ }
+
+ return DAG.getNode(LoongArchISD::XVPERMI, DL, VT, V1,
+ DAG.getConstant(MaskImm, DL, Subtarget.getGRLenVT()));
+}
+
/// Lower VECTOR_SHUFFLE into XVPERM (if possible).
static SDValue lowerVECTOR_SHUFFLE_XVPERM(const SDLoc &DL, ArrayRef<int> Mask,
- MVT VT, SDValue V1, SDValue V2,
- SelectionDAG &DAG,
+ MVT VT, SDValue V1, SelectionDAG &DAG,
const LoongArchSubtarget &Subtarget) {
// LoongArch LASX only have XVPERM_W.
if (Mask.size() != 8 || (VT != MVT::v8i32 && VT != MVT::v8f32))
@@ -2540,14 +2559,16 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
SDValue Result;
// TODO: Add more comparison patterns.
if (V2.isUndef()) {
- if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, V2, DAG,
- Subtarget)))
+ if ((Result =
+ lowerVECTOR_SHUFFLE_XVREPLVEI(DL, Mask, VT, V1, DAG, Subtarget)))
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, Mask, VT, V1, V2, DAG,
Subtarget)))
return Result;
if ((Result =
- lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+ lowerVECTOR_SHUFFLE_XVPERMI(DL, Mask, VT, V1, DAG, Subtarget)))
+ return Result;
+ if ((Result = lowerVECTOR_SHUFFLE_XVPERM(DL, Mask, VT, V1, DAG, Subtarget)))
return Result;
// TODO: This comment may be enabled in the future to better match the
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
index 30539427a1a0a..0b8015ddbdd4a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
@@ -7,13 +7,12 @@
define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: shufflevector_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvpickve.d $xr2, $xr1, 3
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 238
-; CHECK-NEXT: xvrepl128vei.d $xr3, $xr3, 1
-; CHECK-NEXT: vextrins.d $vr3, $vr2, 16
+; CHECK-NEXT: xvpermi.d $xr2, $xr0, 3
+; CHECK-NEXT: xvpickve.d $xr3, $xr1, 3
+; CHECK-NEXT: vextrins.d $vr2, $vr3, 16
; CHECK-NEXT: xvpickve.d $xr1, $xr1, 2
; CHECK-NEXT: vextrins.d $vr0, $vr1, 16
-; CHECK-NEXT: xvpermi.q $xr0, $xr3, 2
+; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2
; CHECK-NEXT: ret
entry:
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 6, i32 3, i32 7>
diff --git a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
index 24f1b31702b71..245f76472b844 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/shuffle-as-permute-and-shuffle.ll
@@ -6,11 +6,8 @@ define <32 x i8> @shuffle_v32i8(<32 x i8> %a) {
; CHECK-LABEL: shuffle_v32i8:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_1)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_1)
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
+; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI0_0)
+; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.h $xr1, $xr2, $xr0
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
; CHECK-NEXT: ret
@@ -34,11 +31,8 @@ define <16 x i16> @shuffle_v16i16(<16 x i16> %a) {
; CHECK-LABEL: shuffle_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_1)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_1)
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
+; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI2_0)
+; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78
; CHECK-NEXT: xvshuf.w $xr1, $xr2, $xr0
; CHECK-NEXT: xvori.b $xr0, $xr1, 0
; CHECK-NEXT: ret
@@ -72,10 +66,7 @@ define <8 x i32> @shuffle_v8i32(<8 x i32> %a) {
define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
; CHECK-LABEL: shuffle_v8i32_same_lane:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225
; CHECK-NEXT: ret
%shuffle = shufflevector <8 x i32> %a, <8 x i32> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7>
ret <8 x i32> %shuffle
@@ -84,14 +75,7 @@ define <8 x i32> @shuffle_v8i32_same_lane(<8 x i32> %a) {
define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
; CHECK-LABEL: shuffle_v4i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI6_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI6_1)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI6_1)
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
ret <4 x i64> %shuffle
@@ -100,10 +84,7 @@ define <4 x i64> @shuffle_v4i64(<4 x i64> %a) {
define <4 x i64> @shuffle_v4i64_same_lane(<4 x i64> %a) {
; CHECK-LABEL: shuffle_v4i64_same_lane:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI7_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI7_0)
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 225
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
ret <4 x i64> %shuffle
@@ -136,14 +117,7 @@ define <8 x float> @shuffle_v8f32_same_lane(<8 x float> %a) {
define <4 x double> @shuffle_v4f64(<4 x double> %a) {
; CHECK-LABEL: shuffle_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI10_0)
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI10_1)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI10_1)
-; CHECK-NEXT: xvpermi.d $xr3, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr2, $xr0, $xr3
-; CHECK-NEXT: xvshuf.d $xr1, $xr2, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 39
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 1, i32 2, i32 0>
ret <4 x double> %shuffle
@@ -152,11 +126,7 @@ define <4 x double> @shuffle_v4f64(<4 x double> %a) {
define <4 x double> @shuffle_v4f64_same_lane(<4 x double> %a) {
; CHECK-LABEL: shuffle_v4f64_same_lane:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI11_0)
-; CHECK-NEXT: xvld $xr1, $a0, %pc_lo12(.LCPI11_0)
-; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78
-; CHECK-NEXT: xvshuf.d $xr1, $xr0, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr1, 0
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 75
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 0, i32 1>
ret <4 x double> %shuffle
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
index c0fa734034114..2007f851129e8 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-shuffle-byte-rotate.ll
@@ -127,9 +127,7 @@ define <4 x i64> @byte_rotate_v4i64_2(<4 x i64> %a, <4 x i64> %b) nounwind {
define <4 x i64> @byte_rotate_v4i64_3(<4 x i64> %a) nounwind {
; CHECK-LABEL: byte_rotate_v4i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: xvbsrl.v $xr1, $xr0, 8
-; CHECK-NEXT: xvbsll.v $xr0, $xr0, 8
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 177
; CHECK-NEXT: ret
%shuffle = shufflevector <4 x i64> %a, <4 x i64> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
ret <4 x i64> %shuffle
More information about the llvm-commits
mailing list