[llvm-branch-commits] [llvm] [LoongArch] Select VSUBI for add with negative splat immediates (PR #191966)

Mon Apr 13 23:24:43 PDT 2026

https://github.com/heiher created https://github.com/llvm/llvm-project/pull/191966

Currently, vector add with a negative splat immediate is lowered as a vector splat followed by a register-register add, e.g.:

```
vrepli.b $vr1, -1
vadd.b   $vr0, $vr0, $vr1
```

This misses the opportunity to use the more efficient VSUBI instruction with a positive immediate.

This patch introduces `selectVSplatImmNeg` to detect negative splat immediates whose negated value fits in a 5-bit unsigned immediate. New patterns `(Pat{Vr,Vr}Nimm5)` are added to match:

```
add v, splat(-imm)  -->  vsubi v, v, imm
```

This avoids materializing the splat constant and reduces the instruction count.

```
add <16 x i8> %a, splat (i8 -1)
  =>
vsubi.bu $vr0, $vr0, 1
```

The transformation is applied for both LSX and LASX vector types.

>From 429f39f637616133e30b399a29cbaa924288bf18 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Tue, 14 Apr 2026 13:08:40 +0800
Subject: [PATCH] [LoongArch] Select VSUBI for add with negative splat
 immediates

Currently, vector add with a negative splat immediate is lowered as a
vector splat followed by a register-register add, e.g.:

```
vrepli.b $vr1, -1
vadd.b   $vr0, $vr0, $vr1
```

This misses the opportunity to use the more efficient VSUBI instruction
with a positive immediate.

This patch introduces `selectVSplatImmNeg` to detect negative splat
immediates whose negated value fits in a 5-bit unsigned immediate. New
patterns `(Pat{Vr,Xr}Nimm5)` are added to match:

```
add v, splat(-imm)  -->  vsubi v, v, imm
```

This avoids materializing the splat constant and reduces the instruction
count.

```
add <16 x i8> %a, splat (i8 -1)
  =>
vsubi.bu $vr0, $vr0, 1
```

The transformation is applied for both LSX and LASX vector types.
---
 .../LoongArch/LoongArchISelDAGToDAG.cpp       | 22 +++++++++++++++++++
 .../Target/LoongArch/LoongArchISelDAGToDAG.h  |  2 ++
 .../LoongArch/LoongArchLASXInstrInfo.td       | 12 ++++++++++
 .../Target/LoongArch/LoongArchLSXInstrInfo.td | 16 ++++++++++++++
 .../LoongArch/lasx/ir-instruction/sub.ll      | 15 +++++--------
 .../LoongArch/lsx/ir-instruction/sub.ll       | 15 +++++--------
 6 files changed, 62 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 442f0a46a4983..7ef857970117c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -425,6 +425,28 @@ bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) {
   return false;
 }
 
+template <unsigned ImmBitSize>
+bool LoongArchDAGToDAGISel::selectVSplatImmNeg(SDValue N,
+                                               SDValue &SplatVal) const {
+  APInt ImmValue;
+  EVT EltTy = N->getValueType(0).getVectorElementType();
+
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0);
+
+  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
+      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+    int64_t NegImm = -ImmValue.getSExtValue();
+    if (isUInt<5>(NegImm)) {
+      SplatVal = CurDAG->getSignedTargetConstant(NegImm, SDLoc(N),
+                                                 Subtarget->getGRLenVT());
+      return true;
+    }
+  }
+
+  return false;
+}
+
 bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
                                                     SDValue &SplatImm) const {
   APInt ImmValue;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 4c8dcb8fa48af..cd78b20de0df2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -62,6 +62,8 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
 
   template <unsigned ImmSize, bool IsSigned = false>
   bool selectVSplatImm(SDValue N, SDValue &SplatVal);
+  template <unsigned ImmSize>
+  bool selectVSplatImmNeg(SDValue N, SDValue &SplatVal) const;
 
   bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const;
   bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 1b18d16a4cabb..55865539c9048 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1186,6 +1186,17 @@ multiclass PatXrUimm5<SDPatternOperator OpNode, string Inst> {
             (!cast<LAInst>(Inst#"_DU") LASX256:$xj, uimm5:$imm)>;
 }
 
+multiclass PatXrNimm5<SDPatternOperator OpNode, string Inst> {
+  def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_nimm5 uimm5:$imm))),
+            (!cast<LAInst>(Inst#"_BU") LASX256:$xj, uimm5:$imm)>;
+  def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_nimm5 uimm5:$imm))),
+            (!cast<LAInst>(Inst#"_HU") LASX256:$xj, uimm5:$imm)>;
+  def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_nimm5 uimm5:$imm))),
+            (!cast<LAInst>(Inst#"_WU") LASX256:$xj, uimm5:$imm)>;
+  def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_nimm5 uimm5:$imm))),
+            (!cast<LAInst>(Inst#"_DU") LASX256:$xj, uimm5:$imm)>;
+}
+
 multiclass PatXrXrXr<SDPatternOperator OpNode, string Inst> {
   def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj),
                     (v32i8 LASX256:$xk)),
@@ -1344,6 +1355,7 @@ defm : PatXrXr<sub, "XVSUB">;
 // XVADDI_{B/H/W/D}U
 defm : PatXrUimm5<add, "XVADDI">;
 // XVSUBI_{B/H/W/D}U
+defm : PatXrNimm5<add, "XVSUBI">;
 defm : PatXrUimm5<sub, "XVSUBI">;
 
 // XVNEG_{B/H/W/D}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index dbc0d68f2254f..0547c2f16f0fe 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -203,6 +203,10 @@ foreach N = [5] in
   def SplatPat_simm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", true>",
                                        [build_vector, bitconvert]>;
 
+foreach N = [5] in
+  def SplatPat_nimm#N : ComplexPattern<vAny, 1, "selectVSplatImmNeg<"#N#">",
+                                       [build_vector, bitconvert]>;
+
 def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
                                           [build_vector, bitconvert]>;
 
@@ -1370,6 +1374,17 @@ multiclass PatVrUimm5<SDPatternOperator OpNode, string Inst> {
             (!cast<LAInst>(Inst#"_DU") LSX128:$vj, uimm5:$imm)>;
 }
 
+multiclass PatVrNimm5<SDPatternOperator OpNode, string Inst> {
+  def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_nimm5 uimm5:$imm))),
+            (!cast<LAInst>(Inst#"_BU") LSX128:$vj, uimm5:$imm)>;
+  def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_nimm5 uimm5:$imm))),
+            (!cast<LAInst>(Inst#"_HU") LSX128:$vj, uimm5:$imm)>;
+  def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_nimm5 uimm5:$imm))),
+            (!cast<LAInst>(Inst#"_WU") LSX128:$vj, uimm5:$imm)>;
+  def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_nimm5 uimm5:$imm))),
+            (!cast<LAInst>(Inst#"_DU") LSX128:$vj, uimm5:$imm)>;
+}
+
 multiclass PatVrVrVr<SDPatternOperator OpNode, string Inst> {
   def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
             (!cast<LAInst>(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
@@ -1554,6 +1569,7 @@ defm : PatVrVr<sub, "VSUB">;
 // VADDI_{B/H/W/D}U
 defm : PatVrUimm5<add, "VADDI">;
 // VSUBI_{B/H/W/D}U
+defm : PatVrNimm5<add, "VSUBI">;
 defm : PatVrUimm5<sub, "VSUBI">;
 
 // VNEG_{B/H/W/D}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
index b598b1b0324ad..9d294e80b02c9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
@@ -125,8 +125,7 @@ entry:
 define <32 x i8> @add_v32i8_n1(<32 x i8> %a) nounwind {
 ; CHECK-LABEL: add_v32i8_n1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, -1
-; CHECK-NEXT:    xvadd.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvsubi.bu $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <32 x i8> %a, splat (i8 -1)
@@ -136,8 +135,7 @@ entry:
 define <32 x i8> @add_v32i8_n31(<32 x i8> %a) nounwind {
 ; CHECK-LABEL: add_v32i8_n31:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, -31
-; CHECK-NEXT:    xvadd.b $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvsubi.bu $xr0, $xr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <32 x i8> %a, splat (i8 -31)
@@ -158,8 +156,7 @@ entry:
 define <16 x i16> @add_v16i16_n31(<16 x i16> %a) nounwind {
 ; CHECK-LABEL: add_v16i16_n31:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.h $xr1, -31
-; CHECK-NEXT:    xvadd.h $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvsubi.hu $xr0, $xr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <16 x i16> %a, splat (i16 -31)
@@ -169,8 +166,7 @@ entry:
 define <8 x i32> @add_v8i32_n31(<8 x i32> %a) nounwind {
 ; CHECK-LABEL: add_v8i32_n31:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.w $xr1, -31
-; CHECK-NEXT:    xvadd.w $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvsubi.wu $xr0, $xr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <8 x i32> %a, splat (i32 -31)
@@ -180,8 +176,7 @@ entry:
 define <4 x i64> @add_v4i64_n31(<4 x i64> %a) nounwind {
 ; CHECK-LABEL: add_v4i64_n31:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.d $xr1, -31
-; CHECK-NEXT:    xvadd.d $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvsubi.du $xr0, $xr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <4 x i64> %a, splat (i64 -31)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
index 52108473e2c85..ee4b4c4a39211 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
@@ -125,8 +125,7 @@ entry:
 define <16 x i8> @add_v16i8_n1(<16 x i8> %a) nounwind {
 ; CHECK-LABEL: add_v16i8_n1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, -1
-; CHECK-NEXT:    vadd.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vsubi.bu $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <16 x i8> %a, splat (i8 -1)
@@ -136,8 +135,7 @@ entry:
 define <16 x i8> @add_v16i8_n31(<16 x i8> %a) nounwind {
 ; CHECK-LABEL: add_v16i8_n31:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, -31
-; CHECK-NEXT:    vadd.b $vr0, $vr0, $vr1
+; CHECK-NEXT:    vsubi.bu $vr0, $vr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <16 x i8> %a, splat (i8 -31)
@@ -158,8 +156,7 @@ entry:
 define <8 x i16> @add_v8i16_n31(<8 x i16> %a) nounwind {
 ; CHECK-LABEL: add_v8i16_n31:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.h $vr1, -31
-; CHECK-NEXT:    vadd.h $vr0, $vr0, $vr1
+; CHECK-NEXT:    vsubi.hu $vr0, $vr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <8 x i16> %a, splat (i16 -31)
@@ -169,8 +166,7 @@ entry:
 define <4 x i32> @add_v4i32_n31(<4 x i32> %a) nounwind {
 ; CHECK-LABEL: add_v4i32_n31:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.w $vr1, -31
-; CHECK-NEXT:    vadd.w $vr0, $vr0, $vr1
+; CHECK-NEXT:    vsubi.wu $vr0, $vr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <4 x i32> %a, splat (i32 -31)
@@ -180,8 +176,7 @@ entry:
 define <2 x i64> @add_v2i64_n31(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: add_v2i64_n31:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.d $vr1, -31
-; CHECK-NEXT:    vadd.d $vr0, $vr0, $vr1
+; CHECK-NEXT:    vsubi.du $vr0, $vr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %0 = add <2 x i64> %a, splat (i64 -31)