[llvm] f090924 - [RISCV] Custom legalize i32 SADDO/SSUBO with RV64LegaI32.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 3 13:15:05 PST 2024
Author: Craig Topper
Date: 2024-02-03T13:07:08-08:00
New Revision: f09092434423be14f32781d8ae263dc041d24551
URL: https://github.com/llvm/llvm-project/commit/f09092434423be14f32781d8ae263dc041d24551
DIFF: https://github.com/llvm/llvm-project/commit/f09092434423be14f32781d8ae263dc041d24551.diff
LOG: [RISCV] Custom legalize i32 SADDO/SSUBO with RV64LegaI32.
The default legalization uses 2 compares and an xor. We can instead
use add+addw+xor+snez like we do without RV64LegaI32.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 212ea7345e05d..fead2aebb5830 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -278,10 +278,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
MVT::i32, Custom);
- setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
MVT::i32, Custom);
- }
+ } else
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
} else {
setLibcallName(
{RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
@@ -5354,6 +5355,26 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
return Op;
}
+// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
+static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
+ "Unexpected custom legalisation");
+ if (isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+
+ bool IsAdd = Op.getOpcode() == ISD::SADDO;
+ SDLoc DL(Op);
+ SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
+ SDValue WideOp = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
+ SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
+ DAG.getValueType(MVT::i32));
+ SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
+ ISD::SETNE);
+ return DAG.getMergeValues({Res, Ovf}, DL);
+}
+
// Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) {
assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
@@ -5873,6 +5894,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR:
return lowerRETURNADDR(Op, DAG);
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ return lowerSADDO_SSUBO(Op, DAG);
case ISD::SMULO:
return lowerSMULO(Op, DAG);
case ISD::SHL_PARTS:
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
index aa3d9bf4134bc..a58fd6f785411 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
@@ -9,10 +9,10 @@ define zeroext i1 @saddo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
; RV64-LABEL: saddo1.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addw a3, a0, a1
-; RV64-NEXT: slt a0, a3, a0
-; RV64-NEXT: slti a1, a1, 0
-; RV64-NEXT: xor a0, a1, a0
-; RV64-NEXT: sw a3, 0(a2)
+; RV64-NEXT: add a1, a0, a1
+; RV64-NEXT: xor a3, a1, a3
+; RV64-NEXT: snez a0, a3
+; RV64-NEXT: sw a1, 0(a2)
; RV64-NEXT: ret
entry:
%t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
@@ -199,10 +199,10 @@ entry:
define zeroext i1 @ssubo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
; RV64-LABEL: ssubo1.i32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: sgtz a3, a1
-; RV64-NEXT: subw a1, a0, a1
-; RV64-NEXT: slt a0, a1, a0
-; RV64-NEXT: xor a0, a3, a0
+; RV64-NEXT: subw a3, a0, a1
+; RV64-NEXT: sub a1, a0, a1
+; RV64-NEXT: xor a3, a1, a3
+; RV64-NEXT: snez a0, a3
; RV64-NEXT: sw a1, 0(a2)
; RV64-NEXT: ret
entry:
@@ -479,8 +479,7 @@ define i32 @saddo.select.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: saddo.select.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addw a2, a0, a1
-; RV64-NEXT: slt a2, a2, a0
-; RV64-NEXT: slti a3, a1, 0
+; RV64-NEXT: add a3, a0, a1
; RV64-NEXT: bne a3, a2, .LBB28_2
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
@@ -497,9 +496,9 @@ define i1 @saddo.not.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: saddo.not.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addw a2, a0, a1
-; RV64-NEXT: slt a0, a2, a0
-; RV64-NEXT: slti a1, a1, 0
-; RV64-NEXT: xor a0, a1, a0
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: snez a0, a0
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: ret
entry:
@@ -606,10 +605,9 @@ entry:
define i32 @ssubo.select.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: ssubo.select.i32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: sgtz a2, a1
-; RV64-NEXT: subw a3, a0, a1
-; RV64-NEXT: slt a3, a3, a0
-; RV64-NEXT: bne a2, a3, .LBB36_2
+; RV64-NEXT: subw a2, a0, a1
+; RV64-NEXT: sub a3, a0, a1
+; RV64-NEXT: bne a3, a2, .LBB36_2
; RV64-NEXT: # %bb.1: # %entry
; RV64-NEXT: mv a0, a1
; RV64-NEXT: .LBB36_2: # %entry
@@ -624,10 +622,10 @@ entry:
define i1 @ssubo.not.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: ssubo.not.i32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: sgtz a2, a1
-; RV64-NEXT: subw a1, a0, a1
-; RV64-NEXT: slt a0, a1, a0
-; RV64-NEXT: xor a0, a2, a0
+; RV64-NEXT: subw a2, a0, a1
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: snez a0, a0
; RV64-NEXT: xori a0, a0, 1
; RV64-NEXT: ret
entry:
@@ -873,9 +871,8 @@ define zeroext i1 @saddo.br.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: saddo.br.i32:
; RV64: # %bb.0: # %entry
; RV64-NEXT: addw a2, a0, a1
-; RV64-NEXT: slt a0, a2, a0
-; RV64-NEXT: slti a1, a1, 0
-; RV64-NEXT: beq a1, a0, .LBB52_2
+; RV64-NEXT: add a0, a0, a1
+; RV64-NEXT: beq a0, a2, .LBB52_2
; RV64-NEXT: # %bb.1: # %overflow
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
@@ -973,10 +970,9 @@ continue:
define zeroext i1 @ssubo.br.i32(i32 signext %v1, i32 signext %v2) {
; RV64-LABEL: ssubo.br.i32:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: sgtz a2, a1
-; RV64-NEXT: subw a1, a0, a1
-; RV64-NEXT: slt a0, a1, a0
-; RV64-NEXT: beq a2, a0, .LBB56_2
+; RV64-NEXT: subw a2, a0, a1
+; RV64-NEXT: sub a0, a0, a1
+; RV64-NEXT: beq a0, a2, .LBB56_2
; RV64-NEXT: # %bb.1: # %overflow
; RV64-NEXT: li a0, 0
; RV64-NEXT: ret
More information about the llvm-commits
mailing list