[llvm] f090924 - [RISCV] Custom legalize i32 SADDO/SSUBO with RV64LegaI32.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Feb 3 13:15:05 PST 2024


Author: Craig Topper
Date: 2024-02-03T13:07:08-08:00
New Revision: f09092434423be14f32781d8ae263dc041d24551

URL: https://github.com/llvm/llvm-project/commit/f09092434423be14f32781d8ae263dc041d24551
DIFF: https://github.com/llvm/llvm-project/commit/f09092434423be14f32781d8ae263dc041d24551.diff

LOG: [RISCV] Custom legalize i32 SADDO/SSUBO with RV64LegaI32.

The default legalization uses 2 compares and an xor. We can instead
use add+addw+xor+snez like we do without RV64LegaI32.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 212ea7345e05d..fead2aebb5830 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -278,10 +278,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::LOAD, MVT::i32, Custom);
       setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
                          MVT::i32, Custom);
-      setOperationAction(ISD::SADDO, MVT::i32, Custom);
       setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
                          MVT::i32, Custom);
-    }
+    } else
+      setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+    setOperationAction(ISD::SADDO, MVT::i32, Custom);
   } else {
     setLibcallName(
         {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
@@ -5354,6 +5355,26 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
   return Op;
 }
 
+// Custom lower i32 SADDO/SSUBO with RV64LegalI32 so we take advantage of addw.
+static SDValue lowerSADDO_SSUBO(SDValue Op, SelectionDAG &DAG) {
+  assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
+         "Unexpected custom legalisation");
+  if (isa<ConstantSDNode>(Op.getOperand(1)))
+    return SDValue();
+
+  bool IsAdd = Op.getOpcode() == ISD::SADDO;
+  SDLoc DL(Op);
+  SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(0));
+  SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op.getOperand(1));
+  SDValue WideOp = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
+  SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, WideOp);
+  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, WideOp,
+                             DAG.getValueType(MVT::i32));
+  SDValue Ovf = DAG.getSetCC(DL, Op.getValue(1).getValueType(), WideOp, SExt,
+                             ISD::SETNE);
+  return DAG.getMergeValues({Res, Ovf}, DL);
+}
+
 // Custom lower i32 SMULO with RV64LegalI32 so we take advantage of mulw.
 static SDValue lowerSMULO(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getValueType() == MVT::i32 && RV64LegalI32 &&
@@ -5873,6 +5894,9 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerFRAMEADDR(Op, DAG);
   case ISD::RETURNADDR:
     return lowerRETURNADDR(Op, DAG);
+  case ISD::SADDO:
+  case ISD::SSUBO:
+    return lowerSADDO_SSUBO(Op, DAG);
   case ISD::SMULO:
     return lowerSMULO(Op, DAG);
   case ISD::SHL_PARTS:

diff  --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
index aa3d9bf4134bc..a58fd6f785411 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/xaluo.ll
@@ -9,10 +9,10 @@ define zeroext i1 @saddo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
 ; RV64-LABEL: saddo1.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    addw a3, a0, a1
-; RV64-NEXT:    slt a0, a3, a0
-; RV64-NEXT:    slti a1, a1, 0
-; RV64-NEXT:    xor a0, a1, a0
-; RV64-NEXT:    sw a3, 0(a2)
+; RV64-NEXT:    add a1, a0, a1
+; RV64-NEXT:    xor a3, a1, a3
+; RV64-NEXT:    snez a0, a3
+; RV64-NEXT:    sw a1, 0(a2)
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
@@ -199,10 +199,10 @@ entry:
 define zeroext i1 @ssubo1.i32(i32 signext %v1, i32 signext %v2, ptr %res) {
 ; RV64-LABEL: ssubo1.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    sgtz a3, a1
-; RV64-NEXT:    subw a1, a0, a1
-; RV64-NEXT:    slt a0, a1, a0
-; RV64-NEXT:    xor a0, a3, a0
+; RV64-NEXT:    subw a3, a0, a1
+; RV64-NEXT:    sub a1, a0, a1
+; RV64-NEXT:    xor a3, a1, a3
+; RV64-NEXT:    snez a0, a3
 ; RV64-NEXT:    sw a1, 0(a2)
 ; RV64-NEXT:    ret
 entry:
@@ -479,8 +479,7 @@ define i32 @saddo.select.i32(i32 signext %v1, i32 signext %v2) {
 ; RV64-LABEL: saddo.select.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    addw a2, a0, a1
-; RV64-NEXT:    slt a2, a2, a0
-; RV64-NEXT:    slti a3, a1, 0
+; RV64-NEXT:    add a3, a0, a1
 ; RV64-NEXT:    bne a3, a2, .LBB28_2
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    mv a0, a1
@@ -497,9 +496,9 @@ define i1 @saddo.not.i32(i32 signext %v1, i32 signext %v2) {
 ; RV64-LABEL: saddo.not.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    addw a2, a0, a1
-; RV64-NEXT:    slt a0, a2, a0
-; RV64-NEXT:    slti a1, a1, 0
-; RV64-NEXT:    xor a0, a1, a0
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    xor a0, a0, a2
+; RV64-NEXT:    snez a0, a0
 ; RV64-NEXT:    xori a0, a0, 1
 ; RV64-NEXT:    ret
 entry:
@@ -606,10 +605,9 @@ entry:
 define i32 @ssubo.select.i32(i32 signext %v1, i32 signext %v2) {
 ; RV64-LABEL: ssubo.select.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    sgtz a2, a1
-; RV64-NEXT:    subw a3, a0, a1
-; RV64-NEXT:    slt a3, a3, a0
-; RV64-NEXT:    bne a2, a3, .LBB36_2
+; RV64-NEXT:    subw a2, a0, a1
+; RV64-NEXT:    sub a3, a0, a1
+; RV64-NEXT:    bne a3, a2, .LBB36_2
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    mv a0, a1
 ; RV64-NEXT:  .LBB36_2: # %entry
@@ -624,10 +622,10 @@ entry:
 define i1 @ssubo.not.i32(i32 signext %v1, i32 signext %v2) {
 ; RV64-LABEL: ssubo.not.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    sgtz a2, a1
-; RV64-NEXT:    subw a1, a0, a1
-; RV64-NEXT:    slt a0, a1, a0
-; RV64-NEXT:    xor a0, a2, a0
+; RV64-NEXT:    subw a2, a0, a1
+; RV64-NEXT:    sub a0, a0, a1
+; RV64-NEXT:    xor a0, a0, a2
+; RV64-NEXT:    snez a0, a0
 ; RV64-NEXT:    xori a0, a0, 1
 ; RV64-NEXT:    ret
 entry:
@@ -873,9 +871,8 @@ define zeroext i1 @saddo.br.i32(i32 signext %v1, i32 signext %v2) {
 ; RV64-LABEL: saddo.br.i32:
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    addw a2, a0, a1
-; RV64-NEXT:    slt a0, a2, a0
-; RV64-NEXT:    slti a1, a1, 0
-; RV64-NEXT:    beq a1, a0, .LBB52_2
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    beq a0, a2, .LBB52_2
 ; RV64-NEXT:  # %bb.1: # %overflow
 ; RV64-NEXT:    li a0, 0
 ; RV64-NEXT:    ret
@@ -973,10 +970,9 @@ continue:
 define zeroext i1 @ssubo.br.i32(i32 signext %v1, i32 signext %v2) {
 ; RV64-LABEL: ssubo.br.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    sgtz a2, a1
-; RV64-NEXT:    subw a1, a0, a1
-; RV64-NEXT:    slt a0, a1, a0
-; RV64-NEXT:    beq a2, a0, .LBB56_2
+; RV64-NEXT:    subw a2, a0, a1
+; RV64-NEXT:    sub a0, a0, a1
+; RV64-NEXT:    beq a0, a2, .LBB56_2
 ; RV64-NEXT:  # %bb.1: # %overflow
 ; RV64-NEXT:    li a0, 0
 ; RV64-NEXT:    ret


        


More information about the llvm-commits mailing list