[llvm-branch-commits] [llvm] [LoongArch] Select VSUBI for add with negative splat immediates (PR #191966)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Apr 13 23:24:43 PDT 2026
https://github.com/heiher created https://github.com/llvm/llvm-project/pull/191966
Currently, vector add with a negative splat immediate is lowered as a vector splat followed by a register-register add, e.g.:
```
vrepli.b $vr1, -1
vadd.b $vr0, $vr0, $vr1
```
This misses the opportunity to use the more efficient VSUBI instruction with a positive immediate.
This patch introduces `selectVSplatImmNeg` to detect negative splat immediates whose negated value fits in a 5-bit unsigned immediate. New patterns `(Pat{Vr,Vr}Nimm5)` are added to match:
```
add v, splat(-imm) --> vsubi v, v, imm
```
This avoids materializing the splat constant and reduces the instruction count.
```
add <16 x i8> %a, splat (i8 -1)
=>
vsubi.bu $vr0, $vr0, 1
```
The transformation is applied for both LSX and LASX vector types.
>From 429f39f637616133e30b399a29cbaa924288bf18 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Tue, 14 Apr 2026 13:08:40 +0800
Subject: [PATCH] [LoongArch] Select VSUBI for add with negative splat
immediates
Currently, vector add with a negative splat immediate is lowered as a
vector splat followed by a register-register add, e.g.:
```
vrepli.b $vr1, -1
vadd.b $vr0, $vr0, $vr1
```
This misses the opportunity to use the more efficient VSUBI instruction
with a positive immediate.
This patch introduces `selectVSplatImmNeg` to detect negative splat
immediates whose negated value fits in a 5-bit unsigned immediate. New
patterns `(Pat{Vr,Xr}Nimm5)` are added to match:
```
add v, splat(-imm) --> vsubi v, v, imm
```
This avoids materializing the splat constant and reduces the instruction
count.
```
add <16 x i8> %a, splat (i8 -1)
=>
vsubi.bu $vr0, $vr0, 1
```
The transformation is applied for both LSX and LASX vector types.
---
.../LoongArch/LoongArchISelDAGToDAG.cpp | 22 +++++++++++++++++++
.../Target/LoongArch/LoongArchISelDAGToDAG.h | 2 ++
.../LoongArch/LoongArchLASXInstrInfo.td | 12 ++++++++++
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 16 ++++++++++++++
.../LoongArch/lasx/ir-instruction/sub.ll | 15 +++++--------
.../LoongArch/lsx/ir-instruction/sub.ll | 15 +++++--------
6 files changed, 62 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 442f0a46a4983..7ef857970117c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -425,6 +425,28 @@ bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) {
return false;
}
+template <unsigned ImmBitSize>
+bool LoongArchDAGToDAGISel::selectVSplatImmNeg(SDValue N,
+ SDValue &SplatVal) const {
+ APInt ImmValue;
+ EVT EltTy = N->getValueType(0).getVectorElementType();
+
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
+ ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ int64_t NegImm = -ImmValue.getSExtValue();
+ if (isUInt<5>(NegImm)) {
+ SplatVal = CurDAG->getSignedTargetConstant(NegImm, SDLoc(N),
+ Subtarget->getGRLenVT());
+ return true;
+ }
+ }
+
+ return false;
+}
+
bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
SDValue &SplatImm) const {
APInt ImmValue;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 4c8dcb8fa48af..cd78b20de0df2 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -62,6 +62,8 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
template <unsigned ImmSize, bool IsSigned = false>
bool selectVSplatImm(SDValue N, SDValue &SplatVal);
+ template <unsigned ImmSize>
+ bool selectVSplatImmNeg(SDValue N, SDValue &SplatVal) const;
bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const;
bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 1b18d16a4cabb..55865539c9048 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1186,6 +1186,17 @@ multiclass PatXrUimm5<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_DU") LASX256:$xj, uimm5:$imm)>;
}
+multiclass PatXrNimm5<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_nimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_BU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_nimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_HU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_nimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_WU") LASX256:$xj, uimm5:$imm)>;
+ def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_nimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_DU") LASX256:$xj, uimm5:$imm)>;
+}
+
multiclass PatXrXrXr<SDPatternOperator OpNode, string Inst> {
def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj),
(v32i8 LASX256:$xk)),
@@ -1344,6 +1355,7 @@ defm : PatXrXr<sub, "XVSUB">;
// XVADDI_{B/H/W/D}U
defm : PatXrUimm5<add, "XVADDI">;
// XVSUBI_{B/H/W/D}U
+defm : PatXrNimm5<add, "XVSUBI">;
defm : PatXrUimm5<sub, "XVSUBI">;
// XVNEG_{B/H/W/D}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index dbc0d68f2254f..0547c2f16f0fe 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -203,6 +203,10 @@ foreach N = [5] in
def SplatPat_simm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", true>",
[build_vector, bitconvert]>;
+foreach N = [5] in
+ def SplatPat_nimm#N : ComplexPattern<vAny, 1, "selectVSplatImmNeg<"#N#">",
+ [build_vector, bitconvert]>;
+
def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
[build_vector, bitconvert]>;
@@ -1370,6 +1374,17 @@ multiclass PatVrUimm5<SDPatternOperator OpNode, string Inst> {
(!cast<LAInst>(Inst#"_DU") LSX128:$vj, uimm5:$imm)>;
}
+multiclass PatVrNimm5<SDPatternOperator OpNode, string Inst> {
+ def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_nimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_BU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_nimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_HU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_nimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_WU") LSX128:$vj, uimm5:$imm)>;
+ def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_nimm5 uimm5:$imm))),
+ (!cast<LAInst>(Inst#"_DU") LSX128:$vj, uimm5:$imm)>;
+}
+
multiclass PatVrVrVr<SDPatternOperator OpNode, string Inst> {
def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)),
(!cast<LAInst>(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>;
@@ -1554,6 +1569,7 @@ defm : PatVrVr<sub, "VSUB">;
// VADDI_{B/H/W/D}U
defm : PatVrUimm5<add, "VADDI">;
// VSUBI_{B/H/W/D}U
+defm : PatVrNimm5<add, "VSUBI">;
defm : PatVrUimm5<sub, "VSUBI">;
// VNEG_{B/H/W/D}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
index b598b1b0324ad..9d294e80b02c9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll
@@ -125,8 +125,7 @@ entry:
define <32 x i8> @add_v32i8_n1(<32 x i8> %a) nounwind {
; CHECK-LABEL: add_v32i8_n1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = add <32 x i8> %a, splat (i8 -1)
@@ -136,8 +135,7 @@ entry:
define <32 x i8> @add_v32i8_n31(<32 x i8> %a) nounwind {
; CHECK-LABEL: add_v32i8_n31:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, -31
-; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 31
; CHECK-NEXT: ret
entry:
%0 = add <32 x i8> %a, splat (i8 -31)
@@ -158,8 +156,7 @@ entry:
define <16 x i16> @add_v16i16_n31(<16 x i16> %a) nounwind {
; CHECK-LABEL: add_v16i16_n31:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.h $xr1, -31
-; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 31
; CHECK-NEXT: ret
entry:
%0 = add <16 x i16> %a, splat (i16 -31)
@@ -169,8 +166,7 @@ entry:
define <8 x i32> @add_v8i32_n31(<8 x i32> %a) nounwind {
; CHECK-LABEL: add_v8i32_n31:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.w $xr1, -31
-; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 31
; CHECK-NEXT: ret
entry:
%0 = add <8 x i32> %a, splat (i32 -31)
@@ -180,8 +176,7 @@ entry:
define <4 x i64> @add_v4i64_n31(<4 x i64> %a) nounwind {
; CHECK-LABEL: add_v4i64_n31:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.d $xr1, -31
-; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1
+; CHECK-NEXT: xvsubi.du $xr0, $xr0, 31
; CHECK-NEXT: ret
entry:
%0 = add <4 x i64> %a, splat (i64 -31)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
index 52108473e2c85..ee4b4c4a39211 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll
@@ -125,8 +125,7 @@ entry:
define <16 x i8> @add_v16i8_n1(<16 x i8> %a) nounwind {
; CHECK-LABEL: add_v16i8_n1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, -1
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vsubi.bu $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = add <16 x i8> %a, splat (i8 -1)
@@ -136,8 +135,7 @@ entry:
define <16 x i8> @add_v16i8_n31(<16 x i8> %a) nounwind {
; CHECK-LABEL: add_v16i8_n31:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, -31
-; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1
+; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31
; CHECK-NEXT: ret
entry:
%0 = add <16 x i8> %a, splat (i8 -31)
@@ -158,8 +156,7 @@ entry:
define <8 x i16> @add_v8i16_n31(<8 x i16> %a) nounwind {
; CHECK-LABEL: add_v8i16_n31:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.h $vr1, -31
-; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1
+; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31
; CHECK-NEXT: ret
entry:
%0 = add <8 x i16> %a, splat (i16 -31)
@@ -169,8 +166,7 @@ entry:
define <4 x i32> @add_v4i32_n31(<4 x i32> %a) nounwind {
; CHECK-LABEL: add_v4i32_n31:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.w $vr1, -31
-; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1
+; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31
; CHECK-NEXT: ret
entry:
%0 = add <4 x i32> %a, splat (i32 -31)
@@ -180,8 +176,7 @@ entry:
define <2 x i64> @add_v2i64_n31(<2 x i64> %a) nounwind {
; CHECK-LABEL: add_v2i64_n31:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.d $vr1, -31
-; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1
+; CHECK-NEXT: vsubi.du $vr0, $vr0, 31
; CHECK-NEXT: ret
entry:
%0 = add <2 x i64> %a, splat (i64 -31)
More information about the llvm-branch-commits
mailing list