[llvm] [RISCV] Add scalar saturating add/sub operations for i32 for RV64P (PR #184062)
Qihan Cai via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 2 15:49:22 PST 2026
https://github.com/realqhc updated https://github.com/llvm/llvm-project/pull/184062
>From 06e874382ae2d470dfea9b10d6a9279d6f31bc67 Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Mon, 2 Mar 2026 15:51:30 +1100
Subject: [PATCH 1/3] [RISCV] Add scalar saturating add/sub operations for i32
for RV64 P Extension
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 21 +++++++-
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 57 ++++++++++++++++++---
2 files changed, 70 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a8542be937a87..d3837e3b22c3d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -478,10 +478,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if ((Subtarget.hasStdExtP() || Subtarget.hasVendorXqcia()) &&
!Subtarget.is64Bit()) {
- // FIXME: Support i32 on RV64+P by inserting into a v2i32 vector, doing
- // the vector operation and extracting.
setOperationAction({ISD::SADDSAT, ISD::SSUBSAT, ISD::UADDSAT, ISD::USUBSAT},
MVT::i32, Legal);
+ } else if (Subtarget.hasStdExtP() && Subtarget.is64Bit()) {
+ setOperationAction({ISD::SADDSAT, ISD::SSUBSAT, ISD::UADDSAT, ISD::USUBSAT},
+ MVT::i32, Custom);
} else if (!Subtarget.hasStdExtZbb() && Subtarget.is64Bit()) {
setOperationAction({ISD::SADDSAT, ISD::SSUBSAT, ISD::UADDSAT, ISD::USUBSAT},
MVT::i32, Custom);
@@ -15587,6 +15588,22 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SSUBSAT: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
+
+ if (Subtarget.hasStdExtP()) {
+ // On RV64, map scalar i32 saturating add/sub through lane 0 of a packed
+ // v2i32 operation so we can select ps*.w instructions.
+ SDValue LHS =
+ DAG.getBitcast(MVT::v2i32, DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
+ N->getOperand(0)));
+ SDValue RHS =
+ DAG.getBitcast(MVT::v2i32, DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
+ N->getOperand(1)));
+ SDValue VecRes = DAG.getNode(N->getOpcode(), DL, MVT::v2i32, LHS, RHS);
+ SDValue Res64 = DAG.getBitcast(MVT::i64, VecRes);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res64));
+ return;
+ }
+
Results.push_back(expandAddSubSat(N, DAG));
return;
}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index 52e0a0afb72d5..6c488cf94060a 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -666,6 +666,51 @@ define <2 x i32> @test_pssubu_w(<2 x i32> %a, <2 x i32> %b) {
ret <2 x i32> %res
}
+; Test scalar saturating add/sub operations for i32 (RV64 only)
+define i32 @test_scalar_psadd_w(i32 %a, i32 %b) {
+; CHECK-LABEL: test_scalar_psadd_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: zext.w a1, a1
+; CHECK-NEXT: zext.w a0, a0
+; CHECK-NEXT: psadd.w a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call i32 @llvm.sadd.sat.i32(i32 %a, i32 %b)
+ ret i32 %res
+}
+
+define i32 @test_scalar_psaddu_w(i32 %a, i32 %b) {
+; CHECK-LABEL: test_scalar_psaddu_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: zext.w a1, a1
+; CHECK-NEXT: zext.w a0, a0
+; CHECK-NEXT: psaddu.w a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
+ ret i32 %res
+}
+
+define i32 @test_scalar_pssub_w(i32 %a, i32 %b) {
+; CHECK-LABEL: test_scalar_pssub_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: zext.w a1, a1
+; CHECK-NEXT: zext.w a0, a0
+; CHECK-NEXT: pssub.w a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call i32 @llvm.ssub.sat.i32(i32 %a, i32 %b)
+ ret i32 %res
+}
+
+define i32 @test_scalar_pssubu_w(i32 %a, i32 %b) {
+; CHECK-LABEL: test_scalar_pssubu_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: zext.w a1, a1
+; CHECK-NEXT: zext.w a0, a0
+; CHECK-NEXT: pssubu.w a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call i32 @llvm.usub.sat.i32(i32 %a, i32 %b)
+ ret i32 %res
+}
+
; Test averaging floor signed operations for v2i32 (RV64 only)
; avgfloors pattern: (a + b) arithmetic shift right 1
define <2 x i32> @test_paadd_w(<2 x i32> %a, <2 x i32> %b) {
@@ -2005,10 +2050,10 @@ define <4 x i16> @test_select_v4i16(i1 %cond, <4 x i16> %a, <4 x i16> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: andi a3, a0, 1
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: bnez a3, .LBB161_2
+; CHECK-NEXT: bnez a3, .LBB165_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a2
-; CHECK-NEXT: .LBB161_2:
+; CHECK-NEXT: .LBB165_2:
; CHECK-NEXT: ret
%res = select i1 %cond, <4 x i16> %a, <4 x i16> %b
ret <4 x i16> %res
@@ -2019,10 +2064,10 @@ define <8 x i8> @test_select_v8i8(i1 %cond, <8 x i8> %a, <8 x i8> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: andi a3, a0, 1
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: bnez a3, .LBB162_2
+; CHECK-NEXT: bnez a3, .LBB166_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a2
-; CHECK-NEXT: .LBB162_2:
+; CHECK-NEXT: .LBB166_2:
; CHECK-NEXT: ret
%res = select i1 %cond, <8 x i8> %a, <8 x i8> %b
ret <8 x i8> %res
@@ -2033,10 +2078,10 @@ define <2 x i32> @test_select_v2i32(i1 %cond, <2 x i32> %a, <2 x i32> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: andi a3, a0, 1
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: bnez a3, .LBB163_2
+; CHECK-NEXT: bnez a3, .LBB167_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a2
-; CHECK-NEXT: .LBB163_2:
+; CHECK-NEXT: .LBB167_2:
; CHECK-NEXT: ret
%res = select i1 %cond, <2 x i32> %a, <2 x i32> %b
ret <2 x i32> %res
>From f3d4bff2d6eb1825da432c00cad26bb43ad0e526 Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Mon, 2 Mar 2026 17:23:16 +1100
Subject: [PATCH 2/3] fixup: correctly handle lowering for UADDSAT/USUBSAT
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d3837e3b22c3d..691e1d120bf81 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15576,14 +15576,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::UADDSAT:
- case ISD::USUBSAT: {
- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
- !Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
- // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
- // promotion for UADDO/USUBO.
- Results.push_back(expandAddSubSat(N, DAG));
- return;
- }
+ case ISD::USUBSAT:
case ISD::SADDSAT:
case ISD::SSUBSAT: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
@@ -15604,6 +15597,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
+ assert(!Subtarget.hasStdExtZbb() && "Unexpected custom legalisation");
Results.push_back(expandAddSubSat(N, DAG));
return;
}
>From 70247ede17f210dea3fdd6f25ed9d9184c3ccbff Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Tue, 3 Mar 2026 10:49:07 +1100
Subject: [PATCH 3/3] use SCALAR_TO_VECTOR and EXTRACT_VECTOR_ELT to avoid
zext.w generation
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 17 +++++++++--------
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 8 --------
2 files changed, 9 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 691e1d120bf81..83f1b8788a145 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15585,15 +15585,16 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
if (Subtarget.hasStdExtP()) {
// On RV64, map scalar i32 saturating add/sub through lane 0 of a packed
// v2i32 operation so we can select ps*.w instructions.
- SDValue LHS =
- DAG.getBitcast(MVT::v2i32, DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
- N->getOperand(0)));
- SDValue RHS =
- DAG.getBitcast(MVT::v2i32, DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64,
- N->getOperand(1)));
+ SDValue LHS = DAG.getNode(
+ ISD::SCALAR_TO_VECTOR, DL, MVT::v2i32,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)));
+ SDValue RHS = DAG.getNode(
+ ISD::SCALAR_TO_VECTOR, DL, MVT::v2i32,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)));
SDValue VecRes = DAG.getNode(N->getOpcode(), DL, MVT::v2i32, LHS, RHS);
- SDValue Res64 = DAG.getBitcast(MVT::i64, VecRes);
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res64));
+ SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
+ Results.push_back(
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, VecRes, Zero));
return;
}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index 6c488cf94060a..3b7be394fd668 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -670,8 +670,6 @@ define <2 x i32> @test_pssubu_w(<2 x i32> %a, <2 x i32> %b) {
define i32 @test_scalar_psadd_w(i32 %a, i32 %b) {
; CHECK-LABEL: test_scalar_psadd_w:
; CHECK: # %bb.0:
-; CHECK-NEXT: zext.w a1, a1
-; CHECK-NEXT: zext.w a0, a0
; CHECK-NEXT: psadd.w a0, a0, a1
; CHECK-NEXT: ret
%res = call i32 @llvm.sadd.sat.i32(i32 %a, i32 %b)
@@ -681,8 +679,6 @@ define i32 @test_scalar_psadd_w(i32 %a, i32 %b) {
define i32 @test_scalar_psaddu_w(i32 %a, i32 %b) {
; CHECK-LABEL: test_scalar_psaddu_w:
; CHECK: # %bb.0:
-; CHECK-NEXT: zext.w a1, a1
-; CHECK-NEXT: zext.w a0, a0
; CHECK-NEXT: psaddu.w a0, a0, a1
; CHECK-NEXT: ret
%res = call i32 @llvm.uadd.sat.i32(i32 %a, i32 %b)
@@ -692,8 +688,6 @@ define i32 @test_scalar_psaddu_w(i32 %a, i32 %b) {
define i32 @test_scalar_pssub_w(i32 %a, i32 %b) {
; CHECK-LABEL: test_scalar_pssub_w:
; CHECK: # %bb.0:
-; CHECK-NEXT: zext.w a1, a1
-; CHECK-NEXT: zext.w a0, a0
; CHECK-NEXT: pssub.w a0, a0, a1
; CHECK-NEXT: ret
%res = call i32 @llvm.ssub.sat.i32(i32 %a, i32 %b)
@@ -703,8 +697,6 @@ define i32 @test_scalar_pssub_w(i32 %a, i32 %b) {
define i32 @test_scalar_pssubu_w(i32 %a, i32 %b) {
; CHECK-LABEL: test_scalar_pssubu_w:
; CHECK: # %bb.0:
-; CHECK-NEXT: zext.w a1, a1
-; CHECK-NEXT: zext.w a0, a0
; CHECK-NEXT: pssubu.w a0, a0, a1
; CHECK-NEXT: ret
%res = call i32 @llvm.usub.sat.i32(i32 %a, i32 %b)
More information about the llvm-commits
mailing list