[llvm] 6b429a9 - [RISCV] Improve RV64 codegen for i32 ISD::SADDO when RHS is constant.
    Craig Topper via llvm-commits 
    llvm-commits at lists.llvm.org
       
    Mon May  8 13:05:42 PDT 2023
    
    
  
Author: Craig Topper
Date: 2023-05-08T13:02:19-07:00
New Revision: 6b429a9cf93342a280d67f63104285888c9884d1
URL: https://github.com/llvm/llvm-project/commit/6b429a9cf93342a280d67f63104285888c9884d1
DIFF: https://github.com/llvm/llvm-project/commit/6b429a9cf93342a280d67f63104285888c9884d1.diff
LOG: [RISCV] Improve RV64 codegen for i32 ISD::SADDO when RHS is constant.
This uses the same sequence we get from LegalizeDAG for i32 on RV32, but modified
to use W instructions.
When the RHS is constant one of the setccs simplifies to a constant and the xor will either
be an xori with 1 or get removed.
When the RHS is not a constant it was not an obvious improvement and it was a regression
when used with a branch. So I've restricted to the constant case.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D150135
Added: 
    
Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/xaluo.ll
Removed: 
    
################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 14a9ca677374..66d3fcf27c51 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -252,6 +252,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
                        MVT::i32, Custom);
 
+    setOperationAction(ISD::SADDO, MVT::i32, Custom);
     setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
                        MVT::i32, Custom);
   } else {
@@ -9033,6 +9034,39 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
     break;
   }
+  case ISD::SADDO: {
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+
+    // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
+    // use the default legalization.
+    if (!isa<ConstantSDNode>(N->getOperand(1)))
+      return;
+
+    SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
+    SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
+    SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
+    Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
+                      DAG.getValueType(MVT::i32));
+
+    SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+
+    // For an addition, the result should be less than one of the operands (LHS)
+    // if and only if the other operand (RHS) is negative, otherwise there will
+    // be overflow.
+    // For a subtraction, the result should be less than one of the operands
+    // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+    // otherwise there will be overflow.
+    EVT OType = N->getValueType(1);
+    SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
+    SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
+
+    SDValue Overflow =
+        DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+    Results.push_back(Overflow);
+    return;
+  }
   case ISD::UADDO:
   case ISD::USUBO: {
     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
diff  --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index e0da75877e31..f6efe23649ec 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -62,10 +62,8 @@ define zeroext i1 @saddo2.i32(i32 signext %v1, ptr %res) {
 ;
 ; RV64-LABEL: saddo2.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    addi a2, a0, 4
-; RV64-NEXT:    addiw a0, a0, 4
-; RV64-NEXT:    xor a0, a0, a2
-; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    addiw a2, a0, 4
+; RV64-NEXT:    slt a0, a2, a0
 ; RV64-NEXT:    sw a2, 0(a1)
 ; RV64-NEXT:    ret
 ;
@@ -78,10 +76,8 @@ define zeroext i1 @saddo2.i32(i32 signext %v1, ptr %res) {
 ;
 ; RV64ZBA-LABEL: saddo2.i32:
 ; RV64ZBA:       # %bb.0: # %entry
-; RV64ZBA-NEXT:    addi a2, a0, 4
-; RV64ZBA-NEXT:    addiw a0, a0, 4
-; RV64ZBA-NEXT:    xor a0, a0, a2
-; RV64ZBA-NEXT:    snez a0, a0
+; RV64ZBA-NEXT:    addiw a2, a0, 4
+; RV64ZBA-NEXT:    slt a0, a2, a0
 ; RV64ZBA-NEXT:    sw a2, 0(a1)
 ; RV64ZBA-NEXT:    ret
 entry:
@@ -104,10 +100,9 @@ define zeroext i1 @saddo3.i32(i32 signext %v1, ptr %res) {
 ;
 ; RV64-LABEL: saddo3.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    addi a2, a0, -4
-; RV64-NEXT:    addiw a0, a0, -4
-; RV64-NEXT:    xor a0, a0, a2
-; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    addiw a2, a0, -4
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    xori a0, a0, 1
 ; RV64-NEXT:    sw a2, 0(a1)
 ; RV64-NEXT:    ret
 ;
@@ -121,10 +116,9 @@ define zeroext i1 @saddo3.i32(i32 signext %v1, ptr %res) {
 ;
 ; RV64ZBA-LABEL: saddo3.i32:
 ; RV64ZBA:       # %bb.0: # %entry
-; RV64ZBA-NEXT:    addi a2, a0, -4
-; RV64ZBA-NEXT:    addiw a0, a0, -4
-; RV64ZBA-NEXT:    xor a0, a0, a2
-; RV64ZBA-NEXT:    snez a0, a0
+; RV64ZBA-NEXT:    addiw a2, a0, -4
+; RV64ZBA-NEXT:    slt a0, a2, a0
+; RV64ZBA-NEXT:    xori a0, a0, 1
 ; RV64ZBA-NEXT:    sw a2, 0(a1)
 ; RV64ZBA-NEXT:    ret
 entry:
@@ -150,11 +144,9 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
 ; RV64:       # %bb.0: # %entry
 ; RV64-NEXT:    lui a2, 4096
 ; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    add a3, a0, a2
-; RV64-NEXT:    addw a0, a0, a2
-; RV64-NEXT:    xor a0, a0, a3
-; RV64-NEXT:    snez a0, a0
-; RV64-NEXT:    sw a3, 0(a1)
+; RV64-NEXT:    addw a2, a0, a2
+; RV64-NEXT:    slt a0, a2, a0
+; RV64-NEXT:    sw a2, 0(a1)
 ; RV64-NEXT:    ret
 ;
 ; RV32ZBA-LABEL: saddo4.i32:
@@ -170,11 +162,9 @@ define zeroext i1 @saddo4.i32(i32 signext %v1, ptr %res) {
 ; RV64ZBA:       # %bb.0: # %entry
 ; RV64ZBA-NEXT:    lui a2, 4096
 ; RV64ZBA-NEXT:    addiw a2, a2, -1
-; RV64ZBA-NEXT:    add a3, a0, a2
-; RV64ZBA-NEXT:    addw a0, a0, a2
-; RV64ZBA-NEXT:    xor a0, a0, a3
-; RV64ZBA-NEXT:    snez a0, a0
-; RV64ZBA-NEXT:    sw a3, 0(a1)
+; RV64ZBA-NEXT:    addw a2, a0, a2
+; RV64ZBA-NEXT:    slt a0, a2, a0
+; RV64ZBA-NEXT:    sw a2, 0(a1)
 ; RV64ZBA-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 16777215)
@@ -594,10 +584,8 @@ define zeroext i1 @ssubo2.i32(i32 signext %v1, ptr %res) {
 ;
 ; RV64-LABEL: ssubo2.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    addi a2, a0, 4
-; RV64-NEXT:    addiw a0, a0, 4
-; RV64-NEXT:    xor a0, a0, a2
-; RV64-NEXT:    snez a0, a0
+; RV64-NEXT:    addiw a2, a0, 4
+; RV64-NEXT:    slt a0, a2, a0
 ; RV64-NEXT:    sw a2, 0(a1)
 ; RV64-NEXT:    ret
 ;
@@ -610,10 +598,8 @@ define zeroext i1 @ssubo2.i32(i32 signext %v1, ptr %res) {
 ;
 ; RV64ZBA-LABEL: ssubo2.i32:
 ; RV64ZBA:       # %bb.0: # %entry
-; RV64ZBA-NEXT:    addi a2, a0, 4
-; RV64ZBA-NEXT:    addiw a0, a0, 4
-; RV64ZBA-NEXT:    xor a0, a0, a2
-; RV64ZBA-NEXT:    snez a0, a0
+; RV64ZBA-NEXT:    addiw a2, a0, 4
+; RV64ZBA-NEXT:    slt a0, a2, a0
 ; RV64ZBA-NEXT:    sw a2, 0(a1)
 ; RV64ZBA-NEXT:    ret
 entry:
        
    
    
More information about the llvm-commits
mailing list