[llvm] 296e8ca - [RISCV] Isel (sra (sext_inreg X, i16), C) -> (srai (slli X, (XLen-16), (XLen-16) + C).

Sun Jan 9 21:24:09 PST 2022

Author: Craig Topper
Date: 2022-01-09T21:23:43-08:00
New Revision: 296e8cae5cdaaf292df00453d15fbd5858d6ea0c

URL: https://github.com/llvm/llvm-project/commit/296e8cae5cdaaf292df00453d15fbd5858d6ea0c
DIFF: https://github.com/llvm/llvm-project/commit/296e8cae5cdaaf292df00453d15fbd5858d6ea0c.diff

LOG: [RISCV] Isel (sra (sext_inreg X, i16), C) -> (srai (slli X, (XLen-16), (XLen-16) + C).

Similar for (sra (sext_inreg X, i8), C).

With Zbb, sext_inreg of i8 and i16 are legal for sext.b and sext.h.
This transform makes the Zbb codegen the same as without Zbb. The
shifts are more compressible. This also exposes an opportunity for
CSE with another slli in the i16 sdiv by constant codegen.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
    llvm/test/CodeGen/RISCV/div-by-constant.ll
    llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
    llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b87a25278922..5afae3265f6c 100644

--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -572,6 +572,37 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
     break;
   }
+  case ISD::SRA: {
+    // Optimize (sra (sext_inreg X, i16), C) ->
+    //          (srai (slli X, (XLen-16), (XLen-16) + C)
+    // And      (sra (sext_inreg X, i8), C) ->
+    //          (srai (slli X, (XLen-8), (XLen-8) + C)
+    // This can occur when Zbb is enabled, which makes sext_inreg i16/i8 legal.
+    // This transform matches the code we get without Zbb. The shifts are more
+    // compressible, and this can help expose CSE opportunities in the sdiv by
+    // constant optimization.
+    auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+    if (!N1C)
+      break;
+    SDValue N0 = Node->getOperand(0);
+    if (N0.getOpcode() != ISD::SIGN_EXTEND_INREG || !N0.hasOneUse())
+      break;
+    uint64_t ShAmt = N1C->getZExtValue();
+    unsigned ExtSize =
+        cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
+    // ExtSize of 32 should use sraiw via tablegen pattern.
+    if (ExtSize >= 32 || ShAmt >= ExtSize)
+      break;
+    unsigned LShAmt = Subtarget->getXLen() - ExtSize;
+    SDNode *SLLI =
+        CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
+                               CurDAG->getTargetConstant(LShAmt, DL, VT));
+    SDNode *SRAI = CurDAG->getMachineNode(
+        RISCV::SRAI, DL, VT, SDValue(SLLI, 0),
+        CurDAG->getTargetConstant(LShAmt + ShAmt, DL, VT));
+    ReplaceNode(Node, SRAI);
+    return;
+  }
   case ISD::AND: {
     auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
     if (!N1C)

diff  --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll
index 50df79f85eae..2f13b18d0ac3 100644
--- a/llvm/test/CodeGen/RISCV/div-by-constant.ll
+++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll
@@ -645,8 +645,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
 ; RV32IMZB-NEXT:    add a0, a1, a0
 ; RV32IMZB-NEXT:    andi a1, a0, 128
 ; RV32IMZB-NEXT:    srli a1, a1, 7
-; RV32IMZB-NEXT:    sext.b a0, a0
-; RV32IMZB-NEXT:    srai a0, a0, 2
+; RV32IMZB-NEXT:    slli a0, a0, 24
+; RV32IMZB-NEXT:    srai a0, a0, 26
 ; RV32IMZB-NEXT:    add a0, a0, a1
 ; RV32IMZB-NEXT:    ret
 ;
@@ -674,8 +674,8 @@ define i8 @sdiv8_constant_add_srai(i8 %a) nounwind {
 ; RV64IMZB-NEXT:    addw a0, a1, a0
 ; RV64IMZB-NEXT:    andi a1, a0, 128
 ; RV64IMZB-NEXT:    srli a1, a1, 7
-; RV64IMZB-NEXT:    sext.b a0, a0
-; RV64IMZB-NEXT:    srai a0, a0, 2
+; RV64IMZB-NEXT:    slli a0, a0, 56
+; RV64IMZB-NEXT:    srai a0, a0, 58
 ; RV64IMZB-NEXT:    add a0, a0, a1
 ; RV64IMZB-NEXT:    ret
   %1 = sdiv i8 %a, 7
@@ -709,8 +709,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
 ; RV32IMZB-NEXT:    sub a0, a1, a0
 ; RV32IMZB-NEXT:    andi a1, a0, 128
 ; RV32IMZB-NEXT:    srli a1, a1, 7
-; RV32IMZB-NEXT:    sext.b a0, a0
-; RV32IMZB-NEXT:    srai a0, a0, 2
+; RV32IMZB-NEXT:    slli a0, a0, 24
+; RV32IMZB-NEXT:    srai a0, a0, 26
 ; RV32IMZB-NEXT:    add a0, a0, a1
 ; RV32IMZB-NEXT:    ret
 ;
@@ -738,8 +738,8 @@ define i8 @sdiv8_constant_sub_srai(i8 %a) nounwind {
 ; RV64IMZB-NEXT:    subw a0, a1, a0
 ; RV64IMZB-NEXT:    andi a1, a0, 128
 ; RV64IMZB-NEXT:    srli a1, a1, 7
-; RV64IMZB-NEXT:    sext.b a0, a0
-; RV64IMZB-NEXT:    srai a0, a0, 2
+; RV64IMZB-NEXT:    slli a0, a0, 56
+; RV64IMZB-NEXT:    srai a0, a0, 58
 ; RV64IMZB-NEXT:    add a0, a0, a1
 ; RV64IMZB-NEXT:    ret
   %1 = sdiv i8 %a, -7
@@ -846,8 +846,6 @@ define i16 @sdiv16_constant_srai(i16 %a) nounwind {
   ret i16 %1
 }
 
-; FIXME: The Zbb test code has 1 more instruction after the mul because we don't
-; share a slli.
 define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
 ; RV32IM-LABEL: sdiv16_constant_add_srai:
 ; RV32IM:       # %bb.0:
@@ -872,10 +870,9 @@ define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
 ; RV32IMZB-NEXT:    mul a1, a1, a2
 ; RV32IMZB-NEXT:    srli a1, a1, 16
 ; RV32IMZB-NEXT:    add a0, a1, a0
-; RV32IMZB-NEXT:    slli a1, a0, 16
-; RV32IMZB-NEXT:    srli a1, a1, 31
-; RV32IMZB-NEXT:    sext.h a0, a0
-; RV32IMZB-NEXT:    srai a0, a0, 3
+; RV32IMZB-NEXT:    slli a0, a0, 16
+; RV32IMZB-NEXT:    srli a1, a0, 31
+; RV32IMZB-NEXT:    srai a0, a0, 19
 ; RV32IMZB-NEXT:    add a0, a0, a1
 ; RV32IMZB-NEXT:    ret
 ;
@@ -902,18 +899,15 @@ define i16 @sdiv16_constant_add_srai(i16 %a) nounwind {
 ; RV64IMZB-NEXT:    mul a1, a1, a2
 ; RV64IMZB-NEXT:    srli a1, a1, 16
 ; RV64IMZB-NEXT:    addw a0, a1, a0
-; RV64IMZB-NEXT:    slli a1, a0, 48
-; RV64IMZB-NEXT:    srli a1, a1, 63
-; RV64IMZB-NEXT:    sext.h a0, a0
-; RV64IMZB-NEXT:    srai a0, a0, 3
+; RV64IMZB-NEXT:    slli a0, a0, 48
+; RV64IMZB-NEXT:    srli a1, a0, 63
+; RV64IMZB-NEXT:    srai a0, a0, 51
 ; RV64IMZB-NEXT:    add a0, a0, a1
 ; RV64IMZB-NEXT:    ret
   %1 = sdiv i16 %a, 15
   ret i16 %1
 }
 
-; FIXME: The Zbb test code has 1 more instruction after the mul because we don't
-; share a slli.
 define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
 ; RV32IM-LABEL: sdiv16_constant_sub_srai:
 ; RV32IM:       # %bb.0:
@@ -938,10 +932,9 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
 ; RV32IMZB-NEXT:    mul a1, a1, a2
 ; RV32IMZB-NEXT:    srli a1, a1, 16
 ; RV32IMZB-NEXT:    sub a0, a1, a0
-; RV32IMZB-NEXT:    slli a1, a0, 16
-; RV32IMZB-NEXT:    srli a1, a1, 31
-; RV32IMZB-NEXT:    sext.h a0, a0
-; RV32IMZB-NEXT:    srai a0, a0, 3
+; RV32IMZB-NEXT:    slli a0, a0, 16
+; RV32IMZB-NEXT:    srli a1, a0, 31
+; RV32IMZB-NEXT:    srai a0, a0, 19
 ; RV32IMZB-NEXT:    add a0, a0, a1
 ; RV32IMZB-NEXT:    ret
 ;
@@ -968,10 +961,9 @@ define i16 @sdiv16_constant_sub_srai(i16 %a) nounwind {
 ; RV64IMZB-NEXT:    mul a1, a1, a2
 ; RV64IMZB-NEXT:    srli a1, a1, 16
 ; RV64IMZB-NEXT:    subw a0, a1, a0
-; RV64IMZB-NEXT:    slli a1, a0, 48
-; RV64IMZB-NEXT:    srli a1, a1, 63
-; RV64IMZB-NEXT:    sext.h a0, a0
-; RV64IMZB-NEXT:    srai a0, a0, 3
+; RV64IMZB-NEXT:    slli a0, a0, 48
+; RV64IMZB-NEXT:    srli a1, a0, 63
+; RV64IMZB-NEXT:    srai a0, a0, 51
 ; RV64IMZB-NEXT:    add a0, a0, a1
 ; RV64IMZB-NEXT:    ret
   %1 = sdiv i16 %a, -15

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
index ed9252f6409f..a4fda68ba021 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
@@ -492,6 +492,8 @@ define i8 @srli_i8(i8 %a) nounwind {
   ret i8 %1
 }
 
+; We could use sext.b+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
 define i8 @srai_i8(i8 %a) nounwind {
 ; RV32I-LABEL: srai_i8:
 ; RV32I:       # %bb.0:
@@ -501,8 +503,8 @@ define i8 @srai_i8(i8 %a) nounwind {
 ;
 ; RV32ZBB-LABEL: srai_i8:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    sext.b a0, a0
-; RV32ZBB-NEXT:    srai a0, a0, 5
+; RV32ZBB-NEXT:    slli a0, a0, 24
+; RV32ZBB-NEXT:    srai a0, a0, 29
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: srai_i8:
@@ -538,6 +540,8 @@ define i16 @srli_i16(i16 %a) nounwind {
   ret i16 %1
 }
 
+; We could use sext.h+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
 define i16 @srai_i16(i16 %a) nounwind {
 ; RV32I-LABEL: srai_i16:
 ; RV32I:       # %bb.0:
@@ -547,8 +551,8 @@ define i16 @srai_i16(i16 %a) nounwind {
 ;
 ; RV32ZBB-LABEL: srai_i16:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    sext.h a0, a0
-; RV32ZBB-NEXT:    srai a0, a0, 9
+; RV32ZBB-NEXT:    slli a0, a0, 16
+; RV32ZBB-NEXT:    srai a0, a0, 25
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: srai_i16:

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
index 23efb3bf9ebc..79a91979f2bd 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
@@ -563,6 +563,8 @@ define i8 @srli_i8(i8 %a) nounwind {
   ret i8 %1
 }
 
+; We could use sext.b+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
 define i8 @srai_i8(i8 %a) nounwind {
 ; RV64I-LABEL: srai_i8:
 ; RV64I:       # %bb.0:
@@ -572,8 +574,8 @@ define i8 @srai_i8(i8 %a) nounwind {
 ;
 ; RV64ZBB-LABEL: srai_i8:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    sext.b a0, a0
-; RV64ZBB-NEXT:    srai a0, a0, 5
+; RV64ZBB-NEXT:    slli a0, a0, 56
+; RV64ZBB-NEXT:    srai a0, a0, 61
 ; RV64ZBB-NEXT:    ret
 ;
 ; RV64ZBP-LABEL: srai_i8:
@@ -609,6 +611,8 @@ define i16 @srli_i16(i16 %a) nounwind {
   ret i16 %1
 }
 
+; We could use sext.h+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
 define i16 @srai_i16(i16 %a) nounwind {
 ; RV64I-LABEL: srai_i16:
 ; RV64I:       # %bb.0:
@@ -618,8 +622,8 @@ define i16 @srai_i16(i16 %a) nounwind {
 ;
 ; RV64ZBB-LABEL: srai_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    sext.h a0, a0
-; RV64ZBB-NEXT:    srai a0, a0, 9
+; RV64ZBB-NEXT:    slli a0, a0, 48
+; RV64ZBB-NEXT:    srai a0, a0, 57
 ; RV64ZBB-NEXT:    ret
 ;
 ; RV64ZBP-LABEL: srai_i16: