[llvm] a33ce06 - [RISCV] Improve i32 UADDSAT/USUBSAT on RV64.

Tue Mar 16 07:47:31 PDT 2021

Author: Craig Topper
Date: 2021-03-16T07:44:06-07:00
New Revision: a33ce06cf59a31c96a484a11b526392d9f8c9548

URL: https://github.com/llvm/llvm-project/commit/a33ce06cf59a31c96a484a11b526392d9f8c9548
DIFF: https://github.com/llvm/llvm-project/commit/a33ce06cf59a31c96a484a11b526392d9f8c9548.diff

LOG: [RISCV] Improve i32 UADDSAT/USUBSAT on RV64.

The default promotion uses zero extends that become shifts. We
cam use sign extend instead which is better for RISCV.

I've used two different implementations based on whether we
have minu/maxu instructions.

Differential Revision: https://reviews.llvm.org/D98683

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/uadd_sat.ll
    llvm/test/CodeGen/RISCV/uadd_sat_plus.ll
    llvm/test/CodeGen/RISCV/usub_sat.ll
    llvm/test/CodeGen/RISCV/usub_sat_plus.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9e45307f42c0..ade1bc20cad7 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -207,6 +207,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::UADDO, MVT::i32, Custom);
     setOperationAction(ISD::USUBO, MVT::i32, Custom);
+    setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
+    setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
   }
 
   if (!Subtarget.hasStdExtM()) {
@@ -3521,6 +3523,29 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(Overflow);
     return;
   }
+  case ISD::UADDSAT:
+  case ISD::USUBSAT: {
+    assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+           "Unexpected custom legalisation");
+    SDLoc DL(N);
+    if (Subtarget.hasStdExtZbb()) {
+      // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
+      // sign extend allows overflow of the lower 32 bits to be detected on
+      // the promoted size.
+      SDValue LHS =
+          DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
+      SDValue RHS =
+          DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
+      SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
+      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+      return;
+    }
+
+    // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
+    // promotion for UADDO/USUBO.
+    Results.push_back(expandAddSubSat(N, DAG));
+    return;
+  }
   case ISD::BITCAST: {
     assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
              Subtarget.hasStdExtF()) ||

diff  --git a/llvm/test/CodeGen/RISCV/uadd_sat.ll b/llvm/test/CodeGen/RISCV/uadd_sat.ll
index bac2a1915344..8f817b3e4972 100644
--- a/llvm/test/CodeGen/RISCV/uadd_sat.ll
+++ b/llvm/test/CodeGen/RISCV/uadd_sat.ll
@@ -24,19 +24,13 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
 ;
 ; RV64I-LABEL: func:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
-; RV64I-NEXT:    add a0, a0, a1
-; RV64I-NEXT:    addi a1, zero, 1
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    addi a1, a1, -1
-; RV64I-NEXT:    bltu a0, a1, .LBB0_2
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    addw a1, a0, a1
+; RV64I-NEXT:    addi a0, zero, -1
+; RV64I-NEXT:    bltu a1, a2, .LBB0_2
 ; RV64I-NEXT:  # %bb.1:
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:  .LBB0_2:
-; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32IZbb-LABEL: func:
@@ -48,16 +42,9 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
 ;
 ; RV64IZbb-LABEL: func:
 ; RV64IZbb:       # %bb.0:
-; RV64IZbb-NEXT:    slli a1, a1, 32
-; RV64IZbb-NEXT:    srli a1, a1, 32
-; RV64IZbb-NEXT:    slli a0, a0, 32
-; RV64IZbb-NEXT:    srli a0, a0, 32
-; RV64IZbb-NEXT:    add a0, a0, a1
-; RV64IZbb-NEXT:    addi a1, zero, 1
-; RV64IZbb-NEXT:    slli a1, a1, 32
-; RV64IZbb-NEXT:    addi a1, a1, -1
-; RV64IZbb-NEXT:    minu a0, a0, a1
-; RV64IZbb-NEXT:    sext.w a0, a0
+; RV64IZbb-NEXT:    not a2, a1
+; RV64IZbb-NEXT:    minu a0, a0, a2
+; RV64IZbb-NEXT:    addw a0, a0, a1
 ; RV64IZbb-NEXT:    ret
   %tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %y);
   ret i32 %tmp;

diff  --git a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll
index 20bb7a24d754..589374493d71 100644
--- a/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/uadd_sat_plus.ll
@@ -25,16 +25,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 ;
 ; RV64I-LABEL: func32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
 ; RV64I-NEXT:    mul a1, a1, a2
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    add a0, a0, a1
-; RV64I-NEXT:    addi a1, zero, 1
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    addi a1, a1, -1
-; RV64I-NEXT:    bltu a0, a1, .LBB0_2
+; RV64I-NEXT:    addw a1, a0, a1
+; RV64I-NEXT:    sext.w a2, a0
+; RV64I-NEXT:    addi a0, zero, -1
+; RV64I-NEXT:    bltu a1, a2, .LBB0_2
 ; RV64I-NEXT:  # %bb.1:
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:  .LBB0_2:
@@ -50,16 +45,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 ;
 ; RV64IZbb-LABEL: func32:
 ; RV64IZbb:       # %bb.0:
-; RV64IZbb-NEXT:    slli a0, a0, 32
-; RV64IZbb-NEXT:    srli a0, a0, 32
-; RV64IZbb-NEXT:    mul a1, a1, a2
-; RV64IZbb-NEXT:    slli a1, a1, 32
-; RV64IZbb-NEXT:    srli a1, a1, 32
+; RV64IZbb-NEXT:    mulw a1, a1, a2
+; RV64IZbb-NEXT:    not a2, a1
+; RV64IZbb-NEXT:    sext.w a0, a0
+; RV64IZbb-NEXT:    minu a0, a0, a2
 ; RV64IZbb-NEXT:    add a0, a0, a1
-; RV64IZbb-NEXT:    addi a1, zero, 1
-; RV64IZbb-NEXT:    slli a1, a1, 32
-; RV64IZbb-NEXT:    addi a1, a1, -1
-; RV64IZbb-NEXT:    minu a0, a0, a1
 ; RV64IZbb-NEXT:    ret
   %a = mul i32 %y, %z
   %tmp = call i32 @llvm.uadd.sat.i32(i32 %x, i32 %a)

diff  --git a/llvm/test/CodeGen/RISCV/usub_sat.ll b/llvm/test/CodeGen/RISCV/usub_sat.ll
index 52657effaa97..353e8eaf838e 100644
--- a/llvm/test/CodeGen/RISCV/usub_sat.ll
+++ b/llvm/test/CodeGen/RISCV/usub_sat.ll
@@ -24,17 +24,13 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
 ;
 ; RV64I-LABEL: func:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    srli a1, a1, 32
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a2, a0, 32
-; RV64I-NEXT:    sub a0, a2, a1
-; RV64I-NEXT:    mv a1, zero
-; RV64I-NEXT:    bltu a2, a0, .LBB0_2
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:    subw a1, a0, a1
+; RV64I-NEXT:    mv a0, zero
+; RV64I-NEXT:    bltu a2, a1, .LBB0_2
 ; RV64I-NEXT:  # %bb.1:
-; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:  .LBB0_2:
-; RV64I-NEXT:    sext.w a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV32IZbb-LABEL: func:
@@ -45,11 +41,7 @@ define signext i32 @func(i32 signext %x, i32 signext %y) nounwind {
 ;
 ; RV64IZbb-LABEL: func:
 ; RV64IZbb:       # %bb.0:
-; RV64IZbb-NEXT:    slli a2, a1, 32
-; RV64IZbb-NEXT:    srli a2, a2, 32
-; RV64IZbb-NEXT:    slli a0, a0, 32
-; RV64IZbb-NEXT:    srli a0, a0, 32
-; RV64IZbb-NEXT:    maxu a0, a0, a2
+; RV64IZbb-NEXT:    maxu a0, a0, a1
 ; RV64IZbb-NEXT:    subw a0, a0, a1
 ; RV64IZbb-NEXT:    ret
   %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %y);

diff  --git a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll
index 590df5e65a72..beeaf54e4238 100644
--- a/llvm/test/CodeGen/RISCV/usub_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/usub_sat_plus.ll
@@ -25,14 +25,11 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 ;
 ; RV64I-LABEL: func32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a3, a0, 32
-; RV64I-NEXT:    mul a0, a1, a2
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
-; RV64I-NEXT:    sub a1, a3, a0
+; RV64I-NEXT:    mul a1, a1, a2
+; RV64I-NEXT:    subw a1, a0, a1
+; RV64I-NEXT:    sext.w a2, a0
 ; RV64I-NEXT:    mv a0, zero
-; RV64I-NEXT:    bltu a3, a1, .LBB0_2
+; RV64I-NEXT:    bltu a2, a1, .LBB0_2
 ; RV64I-NEXT:  # %bb.1:
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:  .LBB0_2:
@@ -47,11 +44,8 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind {
 ;
 ; RV64IZbb-LABEL: func32:
 ; RV64IZbb:       # %bb.0:
-; RV64IZbb-NEXT:    slli a0, a0, 32
-; RV64IZbb-NEXT:    srli a0, a0, 32
-; RV64IZbb-NEXT:    mul a1, a1, a2
-; RV64IZbb-NEXT:    slli a1, a1, 32
-; RV64IZbb-NEXT:    srli a1, a1, 32
+; RV64IZbb-NEXT:    mulw a1, a1, a2
+; RV64IZbb-NEXT:    sext.w a0, a0
 ; RV64IZbb-NEXT:    maxu a0, a0, a1
 ; RV64IZbb-NEXT:    sub a0, a0, a1
 ; RV64IZbb-NEXT:    ret