[llvm-branch-commits] [llvm] [LoongArch] Custom legalize vector_shuffle to `xvextrins` (PR #164375)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Oct 21 01:19:19 PDT 2025
https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/164375
None
>From db2859536c8496ef7225cb9eeb7310d13d4c008c Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 21 Oct 2025 16:07:29 +0800
Subject: [PATCH 1/2] [LoongArch] Custom legalize vector_shuffle to `xvextrins`
---
.../LoongArch/LoongArchISelLowering.cpp | 79 +++++++++++++++++++
1 file changed, 79 insertions(+)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f7deeafc9ccfc..94b2ad8b0eb32 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2319,6 +2319,82 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
}
+/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG,
+ const LoongArchSubtarget &Subtarget) {
+ int NumElts = VT.getVectorNumElements();
+ MVT EltVT = VT.getVectorElementType();
+ MVT GRLenVT = Subtarget.getGRLenVT();
+
+ if ((int)Mask.size() != NumElts)
+ return SDValue();
+
+ auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
+ SmallVector<int> DiffPos;
+ for (int i = 0; i < NumElts; ++i) {
+ if (Mask[i] == -1)
+ continue;
+ if (Mask[i] != Base + i) {
+ DiffPos.push_back(i);
+ if (DiffPos.size() > 2)
+ return SDValue();
+ }
+ }
+
+ // Need exactly two differing element to lower into XVEXTRINS.
+ if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + NumElts / 2)
+ return SDValue();
+
+ // DiffMask must be in its low or high part.
+ int DiffMaskLo = Mask[DiffPos[0]];
+ int DiffMaskHi = Mask[DiffPos[1]];
+ if (!(DiffMaskLo >= 0 && DiffMaskLo < NumElts / 2) &&
+ !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + NumElts / 2))
+ return SDValue();
+ if (!(DiffMaskHi >= NumElts / 2 && DiffMaskHi < NumElts) &&
+ !(DiffMaskHi >= NumElts + NumElts / 2 && DiffMaskHi < 2 * NumElts))
+ return SDValue();
+ if (DiffMaskHi != DiffMaskLo + NumElts / 2)
+ return SDValue();
+
+ // Determine source vector and source index.
+ SDValue SrcVec = (DiffMaskLo < NumElts / 2) ? V1 : V2;
+ int SrcIdxLo =
+ (DiffMaskLo < NumElts / 2) ? DiffMaskLo : (DiffMaskLo - NumElts);
+ bool IsEltFP = EltVT.isFloatingPoint();
+
+ auto extractVal = [&](int Idx) -> SDValue {
+ SDValue Extracted =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
+ SrcVec, DAG.getConstant(Idx, DL, GRLenVT));
+ SDValue InsertVal = Extracted;
+ if (!IsEltFP && EltVT != GRLenVT)
+ InsertVal =
+ DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
+ DAG.getNode(ISD::TRUNCATE, DL, EltVT, Extracted));
+ return InsertVal;
+ };
+
+ // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
+ // the patterns of XVEXTRINS in tablegen.
+ SDValue InsertLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+ (Base == 0) ? V1 : V2, extractVal(SrcIdxLo),
+ DAG.getConstant(DiffPos[0], DL, GRLenVT));
+ SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsertLo,
+ extractVal(SrcIdxLo + NumElts / 2),
+ DAG.getConstant(DiffPos[1], DL, GRLenVT));
+
+ return Result;
+ };
+
+ // Try [0, n-1) insertion then [n, 2n-1) insertion.
+ if (SDValue Result = tryLowerToExtrAndIns(0))
+ return Result;
+ return tryLowerToExtrAndIns(NumElts);
+}
+
/// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
static SDValue
lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
@@ -2639,6 +2715,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return Result;
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
return Result;
+ if ((Result =
+ lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+ return Result;
if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
Zeroable)))
return Result;
>From 07debf07da0cb8bf3ad5c74ad21c4ac9246d7801 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 21 Oct 2025 16:16:41 +0800
Subject: [PATCH 2/2] update tests
---
.../ir-instruction/shuffle-as-xvextrins.ll | 22 ++++++-------------
.../ir-instruction/shuffle-as-xvshuf4i.ll | 5 +----
2 files changed, 8 insertions(+), 19 deletions(-)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvextrins.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvextrins.ll
index 841f383e1bff8..a5b8a7a6e98dc 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvextrins.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvextrins.ll
@@ -8,9 +8,7 @@ define void @shufflevector_v32i8(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT: xvextrins.b $xr0, $xr1, 240
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -26,10 +24,8 @@ define void @shufflevector_v16i16(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK-LABEL: shufflevector_v16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT: xvld $xr1, $a1, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT: xvshuf.h $xr1, $xr0, $xr0
-; CHECK-NEXT: xvst $xr1, $a0, 0
+; CHECK-NEXT: xvextrins.h $xr0, $xr0, 66
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <16 x i16>, ptr %a
@@ -45,10 +41,8 @@ define void @shufflevector_v8i32(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvextrins.w $xr1, $xr0, 3
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x i32>, ptr %a
@@ -64,10 +58,8 @@ define void @shufflevector_v8f32(ptr %res, ptr %a, ptr %b) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
-; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: xvextrins.w $xr1, $xr0, 48
+; CHECK-NEXT: xvst $xr1, $a0, 0
; CHECK-NEXT: ret
entry:
%va = load <8 x float>, ptr %a
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index 69437a24282b2..74976f9507525 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -59,10 +59,7 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT: xvld $xr2, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT: xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT: xvori.b $xr0, $xr2, 0
+; CHECK-NEXT: xvextrins.d $xr0, $xr1, 0
; CHECK-NEXT: ret
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
ret <4 x double> %c
More information about the llvm-branch-commits
mailing list