[llvm-branch-commits] [llvm] [LoongArch] Custom legalize vector_shuffle to `xvextrins` (PR #164375)

Tue Oct 21 01:19:19 PDT 2025

https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/164375

None

>From db2859536c8496ef7225cb9eeb7310d13d4c008c Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 21 Oct 2025 16:07:29 +0800
Subject: [PATCH 1/2] [LoongArch] Custom legalize vector_shuffle to `xvextrins`

---
 .../LoongArch/LoongArchISelLowering.cpp       | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index f7deeafc9ccfc..94b2ad8b0eb32 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2319,6 +2319,82 @@ static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
   return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
 }
 
+/// Lower VECTOR_SHUFFLE into XVEXTRINS (if possible).
+static SDValue
+lowerVECTOR_SHUFFLE_XVEXTRINS(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
+                              SDValue V1, SDValue V2, SelectionDAG &DAG,
+                              const LoongArchSubtarget &Subtarget) {
+  int NumElts = VT.getVectorNumElements();
+  MVT EltVT = VT.getVectorElementType();
+  MVT GRLenVT = Subtarget.getGRLenVT();
+
+  if ((int)Mask.size() != NumElts)
+    return SDValue();
+
+  auto tryLowerToExtrAndIns = [&](int Base) -> SDValue {
+    SmallVector<int> DiffPos;
+    for (int i = 0; i < NumElts; ++i) {
+      if (Mask[i] == -1)
+        continue;
+      if (Mask[i] != Base + i) {
+        DiffPos.push_back(i);
+        if (DiffPos.size() > 2)
+          return SDValue();
+      }
+    }
+
+    // Need exactly two differing element to lower into XVEXTRINS.
+    if (DiffPos.size() != 2 || DiffPos[1] != DiffPos[0] + NumElts / 2)
+      return SDValue();
+
+    // DiffMask must be in its low or high part.
+    int DiffMaskLo = Mask[DiffPos[0]];
+    int DiffMaskHi = Mask[DiffPos[1]];
+    if (!(DiffMaskLo >= 0 && DiffMaskLo < NumElts / 2) &&
+        !(DiffMaskLo >= NumElts && DiffMaskLo < NumElts + NumElts / 2))
+      return SDValue();
+    if (!(DiffMaskHi >= NumElts / 2 && DiffMaskHi < NumElts) &&
+        !(DiffMaskHi >= NumElts + NumElts / 2 && DiffMaskHi < 2 * NumElts))
+      return SDValue();
+    if (DiffMaskHi != DiffMaskLo + NumElts / 2)
+      return SDValue();
+
+    // Determine source vector and source index.
+    SDValue SrcVec = (DiffMaskLo < NumElts / 2) ? V1 : V2;
+    int SrcIdxLo =
+        (DiffMaskLo < NumElts / 2) ? DiffMaskLo : (DiffMaskLo - NumElts);
+    bool IsEltFP = EltVT.isFloatingPoint();
+
+    auto extractVal = [&](int Idx) -> SDValue {
+      SDValue Extracted =
+          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IsEltFP ? EltVT : GRLenVT,
+                      SrcVec, DAG.getConstant(Idx, DL, GRLenVT));
+      SDValue InsertVal = Extracted;
+      if (!IsEltFP && EltVT != GRLenVT)
+        InsertVal =
+            DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
+                        DAG.getNode(ISD::TRUNCATE, DL, EltVT, Extracted));
+      return InsertVal;
+    };
+
+    // Replace with 2*EXTRACT_VECTOR_ELT + 2*INSERT_VECTOR_ELT, it will match
+    // the patterns of XVEXTRINS in tablegen.
+    SDValue InsertLo = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
+                                   (Base == 0) ? V1 : V2, extractVal(SrcIdxLo),
+                                   DAG.getConstant(DiffPos[0], DL, GRLenVT));
+    SDValue Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsertLo,
+                                 extractVal(SrcIdxLo + NumElts / 2),
+                                 DAG.getConstant(DiffPos[1], DL, GRLenVT));
+
+    return Result;
+  };
+
+  // Try [0, n-1) insertion then [n, 2n-1) insertion.
+  if (SDValue Result = tryLowerToExtrAndIns(0))
+    return Result;
+  return tryLowerToExtrAndIns(NumElts);
+}
+
 /// Lower VECTOR_SHUFFLE into XVINSVE0 (if possible).
 static SDValue
 lowerVECTOR_SHUFFLE_XVINSVE0(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
@@ -2639,6 +2715,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
     return Result;
   if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, Mask, VT, V1, V2, DAG)))
     return Result;
+  if ((Result =
+           lowerVECTOR_SHUFFLE_XVEXTRINS(DL, Mask, VT, V1, V2, DAG, Subtarget)))
+    return Result;
   if ((Result = lowerVECTOR_SHUFFLEAsShift(DL, Mask, VT, V1, V2, DAG, Subtarget,
                                            Zeroable)))
     return Result;

>From 07debf07da0cb8bf3ad5c74ad21c4ac9246d7801 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 21 Oct 2025 16:16:41 +0800
Subject: [PATCH 2/2] update tests

---
 .../ir-instruction/shuffle-as-xvextrins.ll    | 22 ++++++-------------
 .../ir-instruction/shuffle-as-xvshuf4i.ll     |  5 +----
 2 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvextrins.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvextrins.ll
index 841f383e1bff8..a5b8a7a6e98dc 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvextrins.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvextrins.ll
@@ -8,9 +8,7 @@ define void @shufflevector_v32i8(ptr %res, ptr %a, ptr %b) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI0_0)
-; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI0_0)
-; CHECK-NEXT:    xvshuf.b $xr0, $xr1, $xr0, $xr2
+; CHECK-NEXT:    xvextrins.b $xr0, $xr1, 240
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -26,10 +24,8 @@ define void @shufflevector_v16i16(ptr %res, ptr %a, ptr %b) nounwind {
 ; CHECK-LABEL: shufflevector_v16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI1_0)
-; CHECK-NEXT:    xvld $xr1, $a1, %pc_lo12(.LCPI1_0)
-; CHECK-NEXT:    xvshuf.h $xr1, $xr0, $xr0
-; CHECK-NEXT:    xvst $xr1, $a0, 0
+; CHECK-NEXT:    xvextrins.h $xr0, $xr0, 66
+; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
   %va = load <16 x i16>, ptr %a
@@ -45,10 +41,8 @@ define void @shufflevector_v8i32(ptr %res, ptr %a, ptr %b) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI2_0)
-; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI2_0)
-; CHECK-NEXT:    xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT:    xvst $xr2, $a0, 0
+; CHECK-NEXT:    xvextrins.w $xr1, $xr0, 3
+; CHECK-NEXT:    xvst $xr1, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
   %va = load <8 x i32>, ptr %a
@@ -64,10 +58,8 @@ define void @shufflevector_v8f32(ptr %res, ptr %a, ptr %b) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvld $xr1, $a2, 0
-; CHECK-NEXT:    pcalau12i $a1, %pc_hi20(.LCPI3_0)
-; CHECK-NEXT:    xvld $xr2, $a1, %pc_lo12(.LCPI3_0)
-; CHECK-NEXT:    xvshuf.w $xr2, $xr1, $xr0
-; CHECK-NEXT:    xvst $xr2, $a0, 0
+; CHECK-NEXT:    xvextrins.w $xr1, $xr0, 48
+; CHECK-NEXT:    xvst $xr1, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
   %va = load <8 x float>, ptr %a
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
index 69437a24282b2..74976f9507525 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll
@@ -59,10 +59,7 @@ define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b)
 define <4 x double> @shufflevector_xvshuf4i_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: shufflevector_xvshuf4i_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcalau12i $a0, %pc_hi20(.LCPI5_0)
-; CHECK-NEXT:    xvld $xr2, $a0, %pc_lo12(.LCPI5_0)
-; CHECK-NEXT:    xvshuf.d $xr2, $xr1, $xr0
-; CHECK-NEXT:    xvori.b $xr0, $xr2, 0
+; CHECK-NEXT:    xvextrins.d $xr0, $xr1, 0
 ; CHECK-NEXT:    ret
     %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
     ret <4 x double> %c