[llvm] 3dc5b53 - [RISCV] Improve legalization of i32 UADDO/USUBO on RV64.

Mon Mar 15 09:30:30 PDT 2021

Author: Craig Topper
Date: 2021-03-15T09:30:23-07:00
New Revision: 3dc5b533e093ee5df92b3c11ee2150869e83b8a6

URL: https://github.com/llvm/llvm-project/commit/3dc5b533e093ee5df92b3c11ee2150869e83b8a6
DIFF: https://github.com/llvm/llvm-project/commit/3dc5b533e093ee5df92b3c11ee2150869e83b8a6.diff

LOG: [RISCV] Improve legalization of i32 UADDO/USUBO on RV64.

The default legalization uses zero extends that require pair of shifts
on RISCV. Instead we can take advantage of the fact that unsigned
compares work equally well on sign extended inputs. This allows
us to use addw/subw and sext.w.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D98233

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/xaluo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 069e90c0b6cb..e93e61549c0f 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -203,6 +203,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SHL, MVT::i32, Custom);
     setOperationAction(ISD::SRA, MVT::i32, Custom);
     setOperationAction(ISD::SRL, MVT::i32, Custom);
+
+    setOperationAction(ISD::UADDO, MVT::i32, Custom);
+    setOperationAction(ISD::USUBO, MVT::i32, Custom);
   }
 
   if (!Subtarget.hasStdExtM()) {
@@ -3468,6 +3471,31 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
     break;
   }
+  case ISD::UADDO:
+  case ISD::USUBO: {
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    bool IsAdd = N->getOpcode() == ISD::UADDO;
+    SDLoc DL(N);
+    // Create an ADDW or SUBW.
+    SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+    SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+    SDValue Res =
+        DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
+    Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
+                      DAG.getValueType(MVT::i32));
+
+    // Sign extend the LHS and perform an unsigned compare with the ADDW result.
+    // Since the inputs are sign extended from i32, this is equivalent to
+    // comparing the lower 32 bits.
+    LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
+    SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
+                                    IsAdd ? ISD::SETULT : ISD::SETUGT);
+
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+    Results.push_back(Overflow);
+    return;
+  }
   case ISD::BITCAST: {
     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
              Subtarget.hasStdExtF()) ||

diff  --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index 04824074faae..43b27d455015 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -215,16 +215,12 @@ define zeroext i1 @uaddo.i32(i32 %v1, i32 %v2, i32* %res) {
 ;
 ; RV64-LABEL: uaddo.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    add a1, a0, a1
-; RV64-NEXT:    slli a0, a1, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    xor a0, a0, a1
-; RV64-NEXT:    snez a0, a0
-; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    addw a3, a0, a1
+; RV64-NEXT:    sext.w a4, a0
+; RV64-NEXT:    sltu a3, a3, a4
+; RV64-NEXT:    add a0, a0, a1
+; RV64-NEXT:    sw a0, 0(a2)
+; RV64-NEXT:    mv a0, a3
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
@@ -358,16 +354,12 @@ define zeroext i1 @usubo.i32(i32 %v1, i32 %v2, i32* %res) {
 ;
 ; RV64-LABEL: usubo.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    sub a1, a0, a1
-; RV64-NEXT:    slli a0, a1, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    xor a0, a0, a1
-; RV64-NEXT:    snez a0, a0
-; RV64-NEXT:    sw a1, 0(a2)
+; RV64-NEXT:    subw a3, a0, a1
+; RV64-NEXT:    sext.w a4, a0
+; RV64-NEXT:    sltu a3, a4, a3
+; RV64-NEXT:    sub a0, a0, a1
+; RV64-NEXT:    sw a0, 0(a2)
+; RV64-NEXT:    mv a0, a3
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
@@ -821,14 +813,9 @@ define i32 @uaddo.select.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: uaddo.select.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    srli a2, a2, 32
-; RV64-NEXT:    slli a3, a0, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    add a2, a3, a2
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    bne a3, a2, .LBB26_2
+; RV64-NEXT:    addw a2, a0, a1
+; RV64-NEXT:    sext.w a3, a0
+; RV64-NEXT:    bltu a2, a3, .LBB26_2
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    mv a0, a1
 ; RV64-NEXT:  .LBB26_2: # %entry
@@ -850,15 +837,10 @@ define i1 @uaddo.not.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: uaddo.not.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    xor a0, a1, a0
-; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    sltu a0, a1, a0
+; RV64-NEXT:    xori a0, a0, 1
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
@@ -1058,14 +1040,9 @@ define i32 @usubo.select.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: usubo.select.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    srli a2, a2, 32
-; RV64-NEXT:    slli a3, a0, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    sub a2, a3, a2
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    srli a3, a3, 32
-; RV64-NEXT:    bne a3, a2, .LBB34_2
+; RV64-NEXT:    subw a2, a0, a1
+; RV64-NEXT:    sext.w a3, a0
+; RV64-NEXT:    bltu a3, a2, .LBB34_2
 ; RV64-NEXT:  # %bb.1: # %entry
 ; RV64-NEXT:    mv a0, a1
 ; RV64-NEXT:  .LBB34_2: # %entry
@@ -1087,15 +1064,10 @@ define i1 @usubo.not.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: usubo.not.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    sub a0, a0, a1
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    xor a0, a1, a0
-; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    sltu a0, a0, a1
+; RV64-NEXT:    xori a0, a0, 1
 ; RV64-NEXT:    ret
 entry:
   %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
@@ -1545,14 +1517,9 @@ define zeroext i1 @uaddo.br.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: uaddo.br.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    beq a1, a0, .LBB48_2
+; RV64-NEXT:    addw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    bgeu a1, a0, .LBB48_2
 ; RV64-NEXT:  # %bb.1: # %overflow
 ; RV64-NEXT:    mv a0, zero
 ; RV64-NEXT:    ret
@@ -1712,14 +1679,9 @@ define zeroext i1 @usubo.br.i32(i32 %v1, i32 %v2) {
 ;
 ; RV64-LABEL: usubo.br.i32:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    srli a0, a0, 32
-; RV64-NEXT:    sub a0, a0, a1
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    beq a1, a0, .LBB52_2
+; RV64-NEXT:    subw a1, a0, a1
+; RV64-NEXT:    sext.w a0, a0
+; RV64-NEXT:    bgeu a0, a1, .LBB52_2
 ; RV64-NEXT:  # %bb.1: # %overflow
 ; RV64-NEXT:    mv a0, zero
 ; RV64-NEXT:    ret