[llvm] 1bf4bbc - [LegalizeTypes][RISCV][WebAssembly] Expand ABS in PromoteIntRes_ABS if it will expand to sra+xor+sub later.

Tue Mar 15 08:31:42 PDT 2022

Author: Craig Topper
Date: 2022-03-15T08:27:39-07:00
New Revision: 1bf4bbc4928338db2fa6dca8ff28160fc1838ac2

URL: https://github.com/llvm/llvm-project/commit/1bf4bbc4928338db2fa6dca8ff28160fc1838ac2
DIFF: https://github.com/llvm/llvm-project/commit/1bf4bbc4928338db2fa6dca8ff28160fc1838ac2.diff

LOG: [LegalizeTypes][RISCV][WebAssembly] Expand ABS in PromoteIntRes_ABS if it will expand to sra+xor+sub later.

If we promote the ABS and then Expand in LegalizeDAG, then both the
sra and the xor will have their inputs sign extended. This generates
extra code on RISCV which lacks an i8 or i16 sign extend instructon.
If we expand during type legalization, then only the sra will get its
input sign extended. RISCV is able to combine this with the sra by
doing a shift left followed by an sra.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D121664

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/test/CodeGen/RISCV/iabs.ll
    llvm/test/CodeGen/WebAssembly/PR41149.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 04eeccb7f2d12..c1e42ef42ccfd 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1432,6 +1432,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO_CARRY(SDNode *N,
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
+  EVT OVT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+
+  // If a larger ABS or SMAX isn't supported by the target, try to expand now.
+  // If we expand later we'll end up sign extending more than just the sra input
+  // in sra+xor+sub expansion.
+  if (!OVT.isVector() &&
+      !TLI.isOperationLegalOrCustomOrPromote(ISD::ABS, NVT) &&
+      !TLI.isOperationLegal(ISD::SMAX, NVT)) {
+    if (SDValue Res = TLI.expandABS(N, DAG))
+      return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Res);
+  }
+
   SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
   return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
 }

diff  --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll
index 4b46c5b865377..749887d9e0e0f 100644
--- a/llvm/test/CodeGen/RISCV/iabs.ll
+++ b/llvm/test/CodeGen/RISCV/iabs.ll
@@ -18,16 +18,13 @@ declare i32 @llvm.abs.i32(i32, i1 immarg)
 declare i64 @llvm.abs.i64(i64, i1 immarg)
 declare i128 @llvm.abs.i128(i128, i1 immarg)
 
-; FIXME: Sign extending the input to the input to the xor isn't needed and
-; causes an extra srai.
 define i8 @abs8(i8 %x) {
 ; RV32I-LABEL: abs8:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    srai a1, a0, 24
-; RV32I-NEXT:    srai a0, a0, 31
-; RV32I-NEXT:    xor a1, a1, a0
-; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abs8:
@@ -39,20 +36,18 @@ define i8 @abs8(i8 %x) {
 ;
 ; RV32ZBT-LABEL: abs8:
 ; RV32ZBT:       # %bb.0:
-; RV32ZBT-NEXT:    slli a0, a0, 24
-; RV32ZBT-NEXT:    srai a1, a0, 24
-; RV32ZBT-NEXT:    srai a0, a0, 31
-; RV32ZBT-NEXT:    xor a1, a1, a0
-; RV32ZBT-NEXT:    sub a0, a1, a0
+; RV32ZBT-NEXT:    slli a1, a0, 24
+; RV32ZBT-NEXT:    srai a1, a1, 31
+; RV32ZBT-NEXT:    xor a0, a0, a1
+; RV32ZBT-NEXT:    sub a0, a0, a1
 ; RV32ZBT-NEXT:    ret
 ;
 ; RV64I-LABEL: abs8:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    srai a1, a0, 56
-; RV64I-NEXT:    srai a0, a0, 63
-; RV64I-NEXT:    xor a1, a1, a0
-; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abs8:
@@ -64,26 +59,22 @@ define i8 @abs8(i8 %x) {
 ;
 ; RV64ZBT-LABEL: abs8:
 ; RV64ZBT:       # %bb.0:
-; RV64ZBT-NEXT:    slli a0, a0, 56
-; RV64ZBT-NEXT:    srai a1, a0, 56
-; RV64ZBT-NEXT:    srai a0, a0, 63
-; RV64ZBT-NEXT:    xor a1, a1, a0
-; RV64ZBT-NEXT:    sub a0, a1, a0
+; RV64ZBT-NEXT:    slli a1, a0, 56
+; RV64ZBT-NEXT:    srai a1, a1, 63
+; RV64ZBT-NEXT:    xor a0, a0, a1
+; RV64ZBT-NEXT:    sub a0, a0, a1
 ; RV64ZBT-NEXT:    ret
   %abs = tail call i8 @llvm.abs.i8(i8 %x, i1 true)
   ret i8 %abs
 }
 
-; FIXME: Sign extending the input to the input to the xor isn't needed and
-; causes an extra srai.
 define i8 @select_abs8(i8 %x) {
 ; RV32I-LABEL: select_abs8:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    srai a1, a0, 24
-; RV32I-NEXT:    srai a0, a0, 31
-; RV32I-NEXT:    xor a1, a1, a0
-; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: select_abs8:
@@ -95,20 +86,18 @@ define i8 @select_abs8(i8 %x) {
 ;
 ; RV32ZBT-LABEL: select_abs8:
 ; RV32ZBT:       # %bb.0:
-; RV32ZBT-NEXT:    slli a0, a0, 24
-; RV32ZBT-NEXT:    srai a1, a0, 24
-; RV32ZBT-NEXT:    srai a0, a0, 31
-; RV32ZBT-NEXT:    xor a1, a1, a0
-; RV32ZBT-NEXT:    sub a0, a1, a0
+; RV32ZBT-NEXT:    slli a1, a0, 24
+; RV32ZBT-NEXT:    srai a1, a1, 31
+; RV32ZBT-NEXT:    xor a0, a0, a1
+; RV32ZBT-NEXT:    sub a0, a0, a1
 ; RV32ZBT-NEXT:    ret
 ;
 ; RV64I-LABEL: select_abs8:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    srai a1, a0, 56
-; RV64I-NEXT:    srai a0, a0, 63
-; RV64I-NEXT:    xor a1, a1, a0
-; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    slli a1, a0, 56
+; RV64I-NEXT:    srai a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: select_abs8:
@@ -120,11 +109,10 @@ define i8 @select_abs8(i8 %x) {
 ;
 ; RV64ZBT-LABEL: select_abs8:
 ; RV64ZBT:       # %bb.0:
-; RV64ZBT-NEXT:    slli a0, a0, 56
-; RV64ZBT-NEXT:    srai a1, a0, 56
-; RV64ZBT-NEXT:    srai a0, a0, 63
-; RV64ZBT-NEXT:    xor a1, a1, a0
-; RV64ZBT-NEXT:    sub a0, a1, a0
+; RV64ZBT-NEXT:    slli a1, a0, 56
+; RV64ZBT-NEXT:    srai a1, a1, 63
+; RV64ZBT-NEXT:    xor a0, a0, a1
+; RV64ZBT-NEXT:    sub a0, a0, a1
 ; RV64ZBT-NEXT:    ret
   %1 = icmp slt i8 %x, 0
   %2 = sub nsw i8 0, %x
@@ -132,16 +120,13 @@ define i8 @select_abs8(i8 %x) {
   ret i8 %3
 }
 
-; FIXME: Sign extending the input to the input to the xor isn't needed and
-; causes an extra srai.
 define i16 @abs16(i16 %x) {
 ; RV32I-LABEL: abs16:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srai a1, a0, 16
-; RV32I-NEXT:    srai a0, a0, 31
-; RV32I-NEXT:    xor a1, a1, a0
-; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abs16:
@@ -153,20 +138,18 @@ define i16 @abs16(i16 %x) {
 ;
 ; RV32ZBT-LABEL: abs16:
 ; RV32ZBT:       # %bb.0:
-; RV32ZBT-NEXT:    slli a0, a0, 16
-; RV32ZBT-NEXT:    srai a1, a0, 16
-; RV32ZBT-NEXT:    srai a0, a0, 31
-; RV32ZBT-NEXT:    xor a1, a1, a0
-; RV32ZBT-NEXT:    sub a0, a1, a0
+; RV32ZBT-NEXT:    slli a1, a0, 16
+; RV32ZBT-NEXT:    srai a1, a1, 31
+; RV32ZBT-NEXT:    xor a0, a0, a1
+; RV32ZBT-NEXT:    sub a0, a0, a1
 ; RV32ZBT-NEXT:    ret
 ;
 ; RV64I-LABEL: abs16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 48
-; RV64I-NEXT:    srai a1, a0, 48
-; RV64I-NEXT:    srai a0, a0, 63
-; RV64I-NEXT:    xor a1, a1, a0
-; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abs16:
@@ -178,26 +161,22 @@ define i16 @abs16(i16 %x) {
 ;
 ; RV64ZBT-LABEL: abs16:
 ; RV64ZBT:       # %bb.0:
-; RV64ZBT-NEXT:    slli a0, a0, 48
-; RV64ZBT-NEXT:    srai a1, a0, 48
-; RV64ZBT-NEXT:    srai a0, a0, 63
-; RV64ZBT-NEXT:    xor a1, a1, a0
-; RV64ZBT-NEXT:    sub a0, a1, a0
+; RV64ZBT-NEXT:    slli a1, a0, 48
+; RV64ZBT-NEXT:    srai a1, a1, 63
+; RV64ZBT-NEXT:    xor a0, a0, a1
+; RV64ZBT-NEXT:    sub a0, a0, a1
 ; RV64ZBT-NEXT:    ret
   %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true)
   ret i16 %abs
 }
 
-; FIXME: Sign extending the input to the input to the xor isn't needed and
-; causes an extra srai.
 define i16 @select_abs16(i16 %x) {
 ; RV32I-LABEL: select_abs16:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srai a1, a0, 16
-; RV32I-NEXT:    srai a0, a0, 31
-; RV32I-NEXT:    xor a1, a1, a0
-; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: select_abs16:
@@ -209,20 +188,18 @@ define i16 @select_abs16(i16 %x) {
 ;
 ; RV32ZBT-LABEL: select_abs16:
 ; RV32ZBT:       # %bb.0:
-; RV32ZBT-NEXT:    slli a0, a0, 16
-; RV32ZBT-NEXT:    srai a1, a0, 16
-; RV32ZBT-NEXT:    srai a0, a0, 31
-; RV32ZBT-NEXT:    xor a1, a1, a0
-; RV32ZBT-NEXT:    sub a0, a1, a0
+; RV32ZBT-NEXT:    slli a1, a0, 16
+; RV32ZBT-NEXT:    srai a1, a1, 31
+; RV32ZBT-NEXT:    xor a0, a0, a1
+; RV32ZBT-NEXT:    sub a0, a0, a1
 ; RV32ZBT-NEXT:    ret
 ;
 ; RV64I-LABEL: select_abs16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 48
-; RV64I-NEXT:    srai a1, a0, 48
-; RV64I-NEXT:    srai a0, a0, 63
-; RV64I-NEXT:    xor a1, a1, a0
-; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    slli a1, a0, 48
+; RV64I-NEXT:    srai a1, a1, 63
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: select_abs16:
@@ -234,11 +211,10 @@ define i16 @select_abs16(i16 %x) {
 ;
 ; RV64ZBT-LABEL: select_abs16:
 ; RV64ZBT:       # %bb.0:
-; RV64ZBT-NEXT:    slli a0, a0, 48
-; RV64ZBT-NEXT:    srai a1, a0, 48
-; RV64ZBT-NEXT:    srai a0, a0, 63
-; RV64ZBT-NEXT:    xor a1, a1, a0
-; RV64ZBT-NEXT:    sub a0, a1, a0
+; RV64ZBT-NEXT:    slli a1, a0, 48
+; RV64ZBT-NEXT:    srai a1, a1, 63
+; RV64ZBT-NEXT:    xor a0, a0, a1
+; RV64ZBT-NEXT:    sub a0, a0, a1
 ; RV64ZBT-NEXT:    ret
   %1 = icmp slt i16 %x, 0
   %2 = sub nsw i16 0, %x

diff  --git a/llvm/test/CodeGen/WebAssembly/PR41149.ll b/llvm/test/CodeGen/WebAssembly/PR41149.ll
index 0913bf0eba220..7ee99e1aff816 100644
--- a/llvm/test/CodeGen/WebAssembly/PR41149.ll
+++ b/llvm/test/CodeGen/WebAssembly/PR41149.ll
@@ -10,7 +10,7 @@ define void @mod() {
 ; CHECK-NEXT: i32.load8_s     0
 ; CHECK-NEXT: local.tee       0
 ; CHECK-NEXT: local.get       0
-; CHECK-NEXT: i32.const       31
+; CHECK-NEXT: i32.const       7
 ; CHECK-NEXT: i32.shr_s
 ; CHECK-NEXT: local.tee       0
 ; CHECK-NEXT: i32.xor